From 6d1fd3f8536bbc0e5e49b183aa10c6e31aac452d Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Tue, 23 Jun 2020 15:07:32 +0200 Subject: [PATCH 01/76] toggle interleaving using cmd line args, svm init --- CHANGELOG | 11 +- api/CMakeLists.txt | 1 + api/include/fftfpga/fftfpga.h | 4 +- api/src/aocl_mmd.h | 524 ++++++++++++++++++++++++++++++++++ api/src/fftfpga.c | 74 ++++- api/src/svm.c | 170 +++++++++++ api/src/svm.h | 10 + examples/fft2d.c | 5 +- examples/fft3d.c | 9 +- 9 files changed, 787 insertions(+), 21 deletions(-) create mode 100644 api/src/aocl_mmd.h create mode 100644 api/src/svm.c create mode 100644 api/src/svm.h diff --git a/CHANGELOG b/CHANGELOG index b6760ac..56b29f4 100755 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,8 +2,6 @@ All notable changes to this project will be documented in this file. ## Unreleased -- coarse grained SVM support for Intel FPGAs -- host code as library - configurable platform name - choice of platform (currently chooses first platform) - choice of device (currently chooses first device) @@ -11,7 +9,14 @@ All notable changes to this project will be documented in this file. - batch mode - xilinx fpgas -## [1.0.0] - [] +## [2.0.0] - [] + +- SVM API: coarse grained SVM support for Intel FPGAs +- Doxygen Doc +- Emulator option using runtime cmd line args +- Command line argument to enable and disable burst interleaved global memory accesses + +## [1.0.0] - [16.06.2020] ### Added - 3d FFT in 2 varients, in bram and ddr transpose diff --git a/api/CMakeLists.txt b/api/CMakeLists.txt index b5ba5d7..9654fab 100755 --- a/api/CMakeLists.txt +++ b/api/CMakeLists.txt @@ -10,6 +10,7 @@ project(fftfpga VERSION 0.3 ## add_library(${PROJECT_NAME} STATIC ${PROJECT_SOURCE_DIR}/src/fftfpga.c + ${PROJECT_SOURCE_DIR}/src/svm.c ${PROJECT_SOURCE_DIR}/src/opencl_utils.c ${PROJECT_SOURCE_DIR}/src/misc.c) diff --git a/api/include/fftfpga/fftfpga.h b/api/include/fftfpga/fftfpga.h index 96f5557..d8a7bcc 100755 --- a/api/include/fftfpga/fftfpga.h +++ b/api/include/fftfpga/fftfpga.h @@ -39,13 +39,13 @@ extern fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, int inv, int ite extern fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, int inv, int iter); // Single Precision 2d FFT using BRAM -extern fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv); +extern fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv, int interleaving); // Single Precision 2d FFT using DDR extern fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, int inv); // Single Precision in BRAM 3d FFT -extern fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, int inv); +extern fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, int inv, int interleaving); // Single Precision in DDR 3d FFT extern fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv); diff --git a/api/src/aocl_mmd.h b/api/src/aocl_mmd.h new file mode 100644 index 0000000..6adadca --- /dev/null +++ b/api/src/aocl_mmd.h @@ -0,0 +1,524 @@ +#ifndef AOCL_MMD_H +#define AOCL_MMD_H + +/* (C) 1992-2019 Intel Corporation. */ +/* Intel, the Intel logo, Intel, MegaCore, NIOS II, Quartus and TalkBack words */ +/* and logos are trademarks of Intel Corporation or its subsidiaries in the U.S. */ +/* and/or other countries. Other marks and brands may be claimed as the property */ +/* of others. See Trademarks on intel.com for full list of Intel trademarks or */ +/* the Trademarks & Brands Names Database (if Intel) or See www.Intel.com/legal (if Altera) */ +/* Your use of Intel Corporation's design tools, logic functions and other */ +/* software and tools, and its AMPP partner logic functions, and any output */ +/* files any of the foregoing (including device programming or simulation */ +/* files), and any associated documentation or information are expressly subject */ +/* to the terms and conditions of the Altera Program License Subscription */ +/* Agreement, Intel MegaCore Function License Agreement, or other applicable */ +/* license agreement, including, without limitation, that your use is for the */ +/* sole purpose of programming logic devices manufactured by Intel and sold by */ +/* Intel or its authorized distributors. Please refer to the applicable */ +/* agreement for further details. */ + + +#ifdef __cplusplus +extern "C" { +#endif + +/* Support for memory mapped ACL devices. + * + * Typical API lifecycle, from the perspective of the caller. + * + * 1. aocl_mmd_open must be called first, to provide a handle for further + * operations. + * + * 2. The interrupt and status handlers must be set. + * + * 3. Read and write operations are performed. + * + * 4. aocl_mmd_close may be called to shut down the device. No further + * operations are permitted until a subsequent aocl_mmd_open call. + * + * aocl_mmd_get_offline_info can be called anytime including before + * open. aocl_mmd_get_info can be called anytime between open and close. + */ + +#ifndef AOCL_MMD_CALL +#if defined(_WIN32) +#define AOCL_MMD_CALL __declspec(dllimport) +#else +#define AOCL_MMD_CALL +#endif +#endif + +#ifndef WEAK +#if defined(_WIN32) +#define WEAK +#else +#define WEAK __attribute__((weak)) +#endif +#endif + +/* The MMD API's version - the runtime expects this string when + * AOCL_MMD_VERSION is queried. This changes only if the API has changed */ +#define AOCL_MMD_VERSION_STRING "18.1" + +/* Memory types that can be supported - bitfield. Other than physical memory + * these types closely align with the OpenCL SVM types. + * + * AOCL_MMD_PHYSICAL_MEMORY - The vendor interface includes IP to communicate + * directly with physical memory such as DDR, QDR, etc. + * + * AOCL_MMD_SVM_COARSE_GRAIN_BUFFER - The vendor interface includes support for + * caching SVM pointer data andy requires explicit function calls from the user + * to sychronize the cache between the host processor and the FPGA. This level + * of SVM is not currently supported by Altera except as a subset of + * SVM_FINE_GAIN_SYSTEM support. + * + * AOCL_MMD_SVM_FINE_GRAIN_BUFFER - The vendor interface includes support for + * caching SVM pointer data and requires additional information from the user + * and/or host runtime that can be collected during pointer allocation in order + * to sychronize the cache between the host processor and the FPGA. Once this + * additional data is provided for an SVM pointer, the vendor interface handles + * cache synchronization between the host processor & the FPGA automatically. + * This level of SVM is not currently supported by Altera except as a subset + * of SVM_FINE_GRAIN_SYSTEM support. + * + * AOCL_MMD_SVM_FINE_GRAIN_SYSTEM - The vendor interface includes support for + * caching SVM pointer data and does not require any additional information to + * sychronize the cache between the host processor and the FPGA. The vendor + * interface handles cache synchronization between the host processor & the + * FPGA automatically for all SVM pointers. This level of SVM support is + * currently under development by Altera and some features may not be fully + * supported. + */ +#define AOCL_MMD_PHYSICAL_MEMORY (1 << 0) +#define AOCL_MMD_SVM_COARSE_GRAIN_BUFFER (1 << 1) +#define AOCL_MMD_SVM_FINE_GRAIN_BUFFER (1 << 2) +#define AOCL_MMD_SVM_FINE_GRAIN_SYSTEM (1 << 3) + +/* program modes - bitfield + * + * AOCL_MMD_PROGRAM_PRESERVE_GLOBAL_MEM - preserve contents of global memory + * when this bit is is set to 1. If programming can't occur without preserving + * global memory contents, the program function must fail, in which case the + * runtime may re-invoke program with this bit set to 0, allowing programming + * to occur even if doing so destroys global memory contents. + * + * more modes are reserved for stacking on in the future + */ +#define AOCL_MMD_PROGRAM_PRESERVE_GLOBAL_MEM (1 << 0) +typedef int aocl_mmd_program_mode_t; + +typedef void* aocl_mmd_op_t; + +typedef struct { + unsigned lo; /* 32 least significant bits of time value. */ + unsigned hi; /* 32 most significant bits of time value. */ +} aocl_mmd_timestamp_t; + + +/* Defines the set of characteristics that can be probed about the board before + * opening a device. The type of data returned by each is specified in + * parentheses in the adjacent comment. + * + * AOCL_MMD_NUM_BOARDS and AOCL_MMD_BOARD_NAMES + * These two fields can be used to implement multi-device support. The MMD + * layer may have a list of devices it is capable of interacting with, each + * identified with a unique name. The length of the list should be returned + * in AOCL_MMD_NUM_BOARDS, and the names of these devices returned in + * AOCL_MMD_BOARD_NAMES. The OpenCL runtime will try to call aocl_mmd_open + * for each board name returned in AOCL_MMD_BOARD_NAMES. + * + * */ +typedef enum { + AOCL_MMD_VERSION = 0, /* Version of MMD (char*)*/ + AOCL_MMD_NUM_BOARDS = 1, /* Number of candidate boards (int)*/ + AOCL_MMD_BOARD_NAMES = 2, /* Names of boards available delimiter=; (char*)*/ + AOCL_MMD_VENDOR_NAME = 3, /* Name of vendor (char*) */ + AOCL_MMD_VENDOR_ID = 4, /* An integer ID for the vendor (int) */ + AOCL_MMD_USES_YIELD = 5, /* 1 if yield must be called to poll hw (int) */ + /* The following can be combined in a bit field: + * AOCL_MMD_PHYSICAL_MEMORY, AOCL_MMD_SVM_COARSE_GRAIN_BUFFER, AOCL_MMD_SVM_FINE_GRAIN_BUFFER, AOCL_MMD_SVM_FINE_GRAIN_SYSTEM. + * Prior to 14.1, all existing devices supported physical memory and no types of SVM memory, so this + * is the default when this operation returns '0' for board MMDs with a version prior to 14.1 + */ + AOCL_MMD_MEM_TYPES_SUPPORTED = 6, +} aocl_mmd_offline_info_t; + +/* Defines the set of characteristics that can be probed about the board after + * opening a device. This can involve communication to the device + * + * AOCL_MMD_NUM_KERNEL_INTERFACES - The number of kernel interfaces, usually 1 + * + * AOCL_MMD_KERNEL_INTERFACES - the handle for each kernel interface. + * param_value will have size AOCL_MMD_NUM_KERNEL_INTERFACES * sizeof int + * + * AOCL_MMD_PLL_INTERFACES - the handle for each pll associated with each + * kernel interface. If a kernel interface is not clocked by acl_kernel_clk + * then return -1 + * + * */ +typedef enum { + AOCL_MMD_NUM_KERNEL_INTERFACES = 1, /* Number of Kernel interfaces (int) */ + AOCL_MMD_KERNEL_INTERFACES = 2, /* Kernel interface (int*) */ + AOCL_MMD_PLL_INTERFACES = 3, /* Kernel clk handles (int*) */ + AOCL_MMD_MEMORY_INTERFACE = 4, /* Global memory handle (int) */ + AOCL_MMD_TEMPERATURE = 5, /* Temperature measurement (float) */ + AOCL_MMD_PCIE_INFO = 6, /* PCIe information (char*) */ + AOCL_MMD_BOARD_NAME = 7, /* Name of board (char*) */ + AOCL_MMD_BOARD_UNIQUE_ID = 8, /* Unique ID of board (int) */ + AOCL_MMD_POWER = 9, /* Power usage of board (Watts) (float) */ + AOCL_MMD_NALLA_DIAGNOSTIC = 10, /* Board specific diagnostics. */ + AOCL_MMD_DEVICE_POWER = 11, /* Power usage of device (Watts) (float) */ + AOCL_MMD_MAC0_ADDRESS = 12, /* MAC address access of modules */ + AOCL_MMD_MAC1_ADDRESS = 13, + AOCL_MMD_MAC2_ADDRESS = 14, + AOCL_MMD_MAC3_ADDRESS = 15, + AOCL_MMD_SCH0_STATUS = 16, + AOCL_MMD_SCH1_STATUS = 17, + AOCL_MMD_SCH2_STATUS = 18, + AOCL_MMD_SCH3_STATUS = 19, + AOCL_MMD_PR_ID = 20, + AOCL_MMD_CONCURRENT_READS = 21, /* # of parallel reads; 1 is serial*/ + AOCL_MMD_CONCURRENT_WRITES = 22, /* # of parallel writes; 1 is serial*/ + AOCL_MMD_CONCURRENT_READS_OR_WRITES = 23, /* total # of concurent operations read + writes*/ + AOCL_MMD_QSFP0_INFO = 24, /*Gets QSFP0 specific info and also setups LR modules if required */ + AOCL_MMD_QSFP1_INFO = 25, /*Gets QSFP1 specific info and also setups LR modules if required */ + AOCL_MMD_QSFP2_INFO = 26, /*Gets QSFP2 specific info and also setups LR modules if required */ + AOCL_MMD_QSFP3_INFO = 27 /*Gets QSFP3 specific info and also setups LR modules if required */ +} aocl_mmd_info_t; + + +typedef struct { + unsigned long long int exception_type; + void *user_private_info; + size_t user_cb; +}aocl_mmd_interrupt_info; + +typedef void (*aocl_mmd_interrupt_handler_fn)( int handle, void* user_data ); +typedef void (*aocl_mmd_device_interrupt_handler_fn)( int handle, aocl_mmd_interrupt_info* data_in, void* user_data ); +typedef void (*aocl_mmd_status_handler_fn)( int handle, void* user_data, aocl_mmd_op_t op, int status ); + + +/* Get information about the board using the enum aocl_mmd_offline_info_t for + * offline info (called without a handle), and the enum aocl_mmd_info_t for + * info specific to a certain board. + * Arguments: + * + * requested_info_id - a value from the aocl_mmd_offline_info_t enum + * + * param_value_size - size of the param_value field in bytes. This should + * match the size of the return type expected as indicated in the enum + * definition. For example, the AOCL_MMD_TEMPERATURE returns a float, so + * the param_value_size should be set to sizeof(float) and you should + * expect the same number of bytes returned in param_size_ret. + * + * param_value - pointer to the variable that will receive the returned info + * + * param_size_ret - receives the number of bytes of data actually returned + * + * Returns: a negative value to indicate error. + */ +AOCL_MMD_CALL int aocl_mmd_get_offline_info( + aocl_mmd_offline_info_t requested_info_id, + size_t param_value_size, + void* param_value, + size_t* param_size_ret ) WEAK; + +AOCL_MMD_CALL int aocl_mmd_get_info( + int handle, + aocl_mmd_info_t requested_info_id, + size_t param_value_size, + void* param_value, + size_t* param_size_ret ) WEAK; + +AOCL_MMD_CALL int aocl_mmd_card_info( + const char * device_name, + aocl_mmd_info_t requested_info_id, + size_t param_value_size, + void* param_value, + size_t* param_size_ret ); + +/*HPC Serial channel status and control functions for access via extention function pointer access in opencl */ +AOCL_MMD_CALL int aocl_mmd_sch_status (const char * device_name, size_t channel_number, unsigned int* param_value); +AOCL_MMD_CALL int aocl_mmd_sch_ctrl (const char * device_name, size_t channel_number, unsigned int param_value); +AOCL_MMD_CALL int aocl_mmd_sch_perfctrl (const char * device_name, size_t channel_number, unsigned int param_value); +AOCL_MMD_CALL int aocl_mmd_sch_rxperf (const char * device_name, size_t channel_number, unsigned int* param_value); +AOCL_MMD_CALL int aocl_mmd_sch_txperf (const char * device_name, size_t channel_number, unsigned int* param_value); + +/* Open and initialize the named device. + * + * The name is typically one specified by the AOCL_MMD_BOARD_NAMES offline + * info. + * + * Arguments: + * name - open the board with this name (provided as a C-style string, + * i.e. NUL terminated ASCII.) + * + * Returns: the non-negative integer handle for the board, otherwise a + * negative value to indicate error. Upon receiving the error, the OpenCL + * runtime will proceed to open other known devices, hence the MMD mustn't + * exit the application if an open call fails. + */ +AOCL_MMD_CALL int aocl_mmd_open(const char *name) WEAK; + +/* Close an opened device, by its handle. + * Returns: 0 on success, negative values on error. + */ +AOCL_MMD_CALL int aocl_mmd_close(int handle) WEAK; + +/* Set the interrupt handler for the opened device. + * The interrupt handler is called whenever the client needs to be notified + * of an asynchronous event signalled by the device internals. + * For example, the kernel has completed or is stalled. + * + * Important: Interrupts from the kernel must be ignored until this handler is + * set + * + * Arguments: + * fn - the callback function to invoke when a kernel interrupt occurs + * user_data - the data that should be passed to fn when it is called. + * + * Returns: 0 if successful, negative on error + */ +AOCL_MMD_CALL int aocl_mmd_set_interrupt_handler( int handle, aocl_mmd_interrupt_handler_fn fn, void* user_data ) WEAK; + +/* Set the device interrupt handler for the opened device. + * The device interrupt handler is called whenever the client needs to be notified + * of a device event signalled by the device internals. + * For example, an ECC error has been reported. + * + * Important: Interrupts from the device must be ignored until this handler is + * set + * + * Arguments: + * fn - the callback function to invoke when a device interrupt occurs + * user_data - the data that should be passed to fn when it is called. + * + * Returns: 0 if successful, negative on error + */ +AOCL_MMD_CALL int aocl_mmd_set_device_interrupt_handler( int handle, aocl_mmd_device_interrupt_handler_fn fn, void* user_data ) WEAK; + +/* Set the operation status handler for the opened device. + * The operation status handler is called with + * status 0 when the operation has completed successfully. + * status negative when the operation completed with errors. + * + * Arguments: + * fn - the callback function to invoke when a status update is to be + * performed. + * user_data - the data that should be passed to fn when it is called. + * + * Returns: 0 if successful, negative on error + */ +AOCL_MMD_CALL int aocl_mmd_set_status_handler( int handle, aocl_mmd_status_handler_fn fn, void* user_data ) WEAK; + +/* If AOCL_MMD_USES_YIELD is 1, this function is called when the host is idle + * and hence possibly waiting for events to be processed by the device. + * If AOCL_MMD_USES_YIELD is 0, this function is never called and the MMD is + * assumed to provide status/event updates via some other execution thread + * such as through an interrupt handler. + * + * Returns: non-zero if the yield function performed useful work such as + * processing DMA transactions, 0 if there is no useful work to be performed + * + * NOTE: yield may be called continuously as long as it reports that it has useful work + */ +AOCL_MMD_CALL int aocl_mmd_yield(int handle) WEAK; + +/* Read, write and copy operations on a single interface. + * If op is NULL + * - Then these calls must block until the operation is complete. + * - The status handler is not called for this operation. + * + * If op is non-NULL, then: + * - These may be non-blocking calls + * - The status handler must be called upon completion, with status 0 + * for success, and a negative value for failure. + * + * Arguments: + * op - the operation object used to track this operations progress + * + * len - the size in bytes to transfer + * + * src - the host buffer being read from + * + * dst - the host buffer being written to + * + * mmd_interface - the handle to the interface being accessed. E.g. To + * access global memory this handle will be whatever is returned by + * aocl_mmd_get_info when called with AOCL_MMD_MEMORY_INTERFACE. + * + * offset/src_offset/dst_offset - the byte offset within the interface that + * the transfer will begin at. + * + * The return value is 0 if the operation launch was successful, and + * negative otherwise. + */ +AOCL_MMD_CALL int aocl_mmd_read( + int handle, + aocl_mmd_op_t op, + size_t len, + void* dst, + int mmd_interface, size_t offset ) WEAK; +AOCL_MMD_CALL int aocl_mmd_write( + int handle, + aocl_mmd_op_t op, + size_t len, + const void* src, + int mmd_interface, size_t offset ) WEAK; +AOCL_MMD_CALL int aocl_mmd_copy( + int handle, + aocl_mmd_op_t op, + size_t len, + int mmd_interface, size_t src_offset, size_t dst_offset ) WEAK; + +/* Host Channel create operation + * Opens channel between host and kernel. + * + * Arguments: + * channel_name - name of channel to initialize. Same name as used in board_spec.xml + * + * queue_depth - the size in bytes of pinned memory queue in system memory + * + * direction - the direction of the channel + * + * The return value is negative if initialization was unsuccessful, and + * positive otherwise. Positive return value is handle to the channel to be used for + * subsequent calls for the channel. + */ +AOCL_MMD_CALL int aocl_mmd_hostchannel_create( + int handle, + char *channel_name, + size_t queue_depth, + int direction) WEAK; + +/* Host Channel destroy operation + * Closes channel between host and kernel. + * + * Arguments: + * channel - the handle to the channel to close, that was obtained with + * create channel + * + * The return value is 0 if the destroy was successful, and negative + * otherwise. + */ +AOCL_MMD_CALL int aocl_mmd_hostchannel_destroy( + int handle, + int channel) WEAK; + +/* Host Channel get buffer operation + * Provide host with pointer to buffer they can access to to write or + * read from kernel, along with space or data available in the buffer + * in bytes. + * + * Arguments: + * channel - the handle to the channel to get the buffer for + * + * buffer_size - the address that this call will write the amount of + * space or data that's available in the buffer, + * depending on direction of the channel, in bytes + * + * status - the address that this call will write to for result of this + * call. Value will be 0 for success, and negative otherwise + * + * The return value is the pointer to the buffer that host can write + * to or read from. NULL if the status is negative. + */ +AOCL_MMD_CALL void *aocl_mmd_hostchannel_get_buffer( + int handle, + int channel, + size_t *buffer_size, + int *status) WEAK; + +/* Host Channel acknowledge buffer operation + * Acknowledge to the channel that the user has written or read data from + * it. This will make the data or additional buffer space available to + * write to or read from kernel. + * + * Arguments: + * channel - the handle to the channel that user is acknowledging + * + * send_size - the size in bytes that the user is acknowledging + * + * status - the address that this call will write to for result of this + * call. Value will be 0 for success, and negative otherwise + * + * The return value is equal to send_size if send_size was less than or + * equal to the buffer_size from get buffer call. If send_size was + * greater, then return value is the amount that was actually sent. + */ +AOCL_MMD_CALL size_t aocl_mmd_hostchannel_ack_buffer( + int handle, + int channel, + size_t send_size, + int *status) WEAK; + +/* Program the device + * + * The host will guarantee that no operations are currently executing on the + * device. That means the kernels will be idle and no read/write/copy + * commands are active. Interrupts should be disabled and the FPGA should + * be reprogrammed with the data from user_data which has size size. The host + * will then call aocl_mmd_set_status_handler and aocl_mmd_set_interrupt_handler + * again. At this point interrupts can be enabled. + * + * The new handle to the board after reprogram does not have to be the same as + * the one before. + * + * Arguments: + * user_data - The binary contents of the fpga.bin file created during + * Quartus II compilation. + * size - the size in bytes of user_data + * program_mode - bit field for programming attributes. See + * aocl_mmd_program_mode_t definition + * + * Returns: the new non-negative integer handle for the board, otherwise a + * negative value to indicate error. + */ +AOCL_MMD_CALL int aocl_mmd_program( int handle, void * user_data, size_t size, aocl_mmd_program_mode_t program_mode) WEAK; + + +/* Shared memory allocator + * Allocates memory that is shared between the host and the FPGA. The + * host will access this memory using the pointer returned by + * aocl_mmd_shared_mem_alloc, while the FPGA will access the shared memory + * using device_ptr_out. If shared memory is not supported this should return + * NULL. + * + * Shared memory survives FPGA reprogramming if the CPU is not rebooted. + * + * Arguments: + * size - the size of the shared memory to allocate + * device_ptr_out - will receive the pointer value used by the FPGA (the device) + * to access the shared memory. Cannot be NULL. The type is + * unsigned long long to handle the case where the host has a + * smaller pointer size than the device. + * + * Returns: The pointer value to be used by the host to access the shared + * memory if successful, otherwise NULL. + */ +AOCL_MMD_CALL void * aocl_mmd_shared_mem_alloc( int handle, size_t size, unsigned long long *device_ptr_out ) WEAK; + +/* Shared memory de-allocator + * Frees previously allocated shared memory. If shared memory is not supported, + * this function should do nothing. + * + * Arguments: + * host_ptr - the host pointer that points to the shared memory, as returned by + * aocl_mmd_shared_mem_alloc + * size - the size of the shared memory to free. Must match the size + * originally passed to aocl_mmd_shared_mem_alloc + */ +AOCL_MMD_CALL void aocl_mmd_shared_mem_free ( int handle, void* host_ptr, size_t size ) WEAK; + +/* DEPRECATED. Use aocl_mmd_program instead + * This reprogram API is only for mmd version previous than 18.1 +*/ +AOCL_MMD_CALL int aocl_mmd_reprogram( int handle, void * user_data, size_t size) WEAK; + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/api/src/fftfpga.c b/api/src/fftfpga.c index 1a6c2a1..ec056e9 100755 --- a/api/src/fftfpga.c +++ b/api/src/fftfpga.c @@ -9,6 +9,7 @@ #include "CL/opencl.h" #include "fftfpga/fftfpga.h" +#include "svm.h" #include "opencl_utils.h" #include "misc.h" @@ -23,7 +24,7 @@ static cl_command_queue queue1 = NULL, queue2 = NULL, queue3 = NULL; static cl_command_queue queue4 = NULL, queue5 = NULL, queue6 = NULL; //static int svm_handle; -//static int svm_enabled = 0; +static int svm_enabled = 0; #endif static void queue_setup(); @@ -80,6 +81,7 @@ void* fftfpgaf_complex_malloc(size_t sz, int svm){ -2 Unable to find platform passed as argument -3 Unable to find devices for given OpenCL platform -4 Failed to create program, file not found in path + -5 Device does not support required SVM */ int fpga_initialize(const char *platform_name, const char *path, int use_svm, int use_emulator){ @@ -112,6 +114,17 @@ int fpga_initialize(const char *platform_name, const char *path, int use_svm, in // use the first device. device = devices[0]; + if(use_svm){ + if(!check_valid_svm_device(device)){ + return -5; + } + else{ + printf("Supports SVM \n"); + svm_enabled = 1; + return -6; + } + } + // Create the context. context = clCreateContext(NULL, 1, &device, NULL, NULL, &status); checkError(status, "Failed to create context"); @@ -375,8 +388,8 @@ fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, int inv, int iter){ /** * \brief compute an out-of-place single precision complex 2D-FFT using the DDR of the FPGA * \param N : integer pointer to size of FFT2d - * \param inp : float2 pointer to input data of size N - * \param out : float2 pointer to output data of size N + * \param inp : float2 pointer to input data of size [N * N] + * \param out : float2 pointer to output data of size [N * N] * \param inv : int toggle to activate backward FFT * \param iter : int toggle to activate backward FFT * \return fpga_t : time taken in milliseconds for data transfers and execution @@ -499,12 +512,13 @@ fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, int inv){ /** * \brief compute an out-of-place single precision complex 2D-FFT using the BRAM of the FPGA * \param N : integer pointer to size of FFT2d - * \param inp : float2 pointer to input data of size N - * \param out : float2 pointer to output data of size N + * \param inp : float2 pointer to input data of size [N * N] + * \param out : float2 pointer to output data of size [N * N] * \param inv : int toggle to activate backward FFT + * \param interleaving : 1 if interleaved global memory buffers * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv){ +fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv, int interleaving){ fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_kernel ffta_kernel = NULL, fftb_kernel = NULL; cl_kernel fetch_kernel = NULL, transpose_kernel = NULL, store_kernel = NULL; @@ -523,12 +537,21 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv){ queue_setup(); + cl_mem_flags flagbuf1, flagbuf2; + if(interleaving == 1){ + flagbuf1 = CL_MEM_READ_WRITE; + flagbuf2 = CL_MEM_READ_WRITE; + } + else{ + flagbuf1 = CL_MEM_WRITE_ONLY | CL_CHANNEL_1_INTELFPGA; + flagbuf2 = CL_MEM_READ_ONLY | CL_CHANNEL_2_INTELFPGA; + } // Device memory buffers cl_mem d_inData, d_outData; - d_inData = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, NULL, &status); + d_inData = clCreateBuffer(context, flagbuf1, sizeof(float2) * num_pts, NULL, &status); checkError(status, "Failed to allocate input device buffer\n"); - d_outData = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, NULL, &status); + d_outData = clCreateBuffer(context, flagbuf2, sizeof(float2) * num_pts, NULL, &status); checkError(status, "Failed to allocate output device buffer\n"); // Copy data from host to device @@ -628,8 +651,16 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv){ return fft_time; } - -fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, int inv) { +/** + * \brief compute an out-of-place single precision complex 3D-FFT using the BRAM of the FPGA + * \param N : integer pointer addressing the size of FFT3d + * \param inp : float2 pointer to input data of size [N * N * N] + * \param out : float2 pointer to output data of size [N * N * N] + * \param inv : int toggle to activate backward FFT + * \param interleaving : 1 if using burst interleaved global memory buffers + * \return fpga_t : time taken in milliseconds for data transfers and execution + */ +fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, int inv, int interleaving) { fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_kernel fft_kernel = NULL, fft_kernel_2 = NULL; cl_kernel fetch_kernel = NULL, transpose_kernel = NULL, transpose_kernel_2 = NULL; @@ -646,11 +677,21 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, int inv) { queue_setup(); + cl_mem_flags flagbuf1, flagbuf2; + if(interleaving == 1){ + flagbuf1 = CL_MEM_READ_WRITE; + flagbuf2 = CL_MEM_READ_WRITE; + } + else{ + flagbuf1 = CL_MEM_WRITE_ONLY | CL_CHANNEL_1_INTELFPGA; + flagbuf2 = CL_MEM_READ_ONLY | CL_CHANNEL_2_INTELFPGA; + } + // Device memory buffers cl_mem d_inData, d_outData; - d_inData = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * N * N * N, NULL, &status); + d_inData = clCreateBuffer(context, flagbuf1, sizeof(float2) * N * N * N, NULL, &status); checkError(status, "Failed to allocate input device buffer\n"); - d_outData = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * N * N * N, NULL, &status); + d_outData = clCreateBuffer(context, flagbuf2, sizeof(float2) * N * N * N, NULL, &status); checkError(status, "Failed to allocate output device buffer\n"); // Copy data from host to device @@ -748,6 +789,15 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, int inv) { return fft_time; } +/** + * \brief compute an out-of-place single precision complex 3D-FFT using the DDR of the FPGA + * \param N : integer pointer addressing the size of FFT3d + * \param inp : float2 pointer to input data of size [N * N * N] + * \param out : float2 pointer to output data of size [N * N * N] + * \param inv : int toggle to activate backward FFT + * \param interleaving : 1 if using burst interleaved global memory buffers + * \return fpga_t : time taken in milliseconds for data transfers and execution + */ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv) { fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_kernel ffta_kernel = NULL, fftb_kernel = NULL, fftc_kernel = NULL; diff --git a/api/src/svm.c b/api/src/svm.c new file mode 100644 index 0000000..05986f5 --- /dev/null +++ b/api/src/svm.c @@ -0,0 +1,170 @@ +#define CL_VERSION_2_0 +#include +#include +#include "CL/opencl.h" +#include "aocl_mmd.h" +#include "svm.h" +#include "opencl_utils.h" + +/* +int replace(){ + const char* board_name; + aocl_mmd_offline_info_t info_id; + aocl_mmd_get_offline_info(info_id, ); + + return aocl_mmd_open(board_name); +} +*/ + +/** + * @brief Check if device support svm + * @param device + * @return true if supported and false if not + */ +bool check_valid_svm_device(cl_device_id device){ + cl_device_svm_capabilities caps = 0; + cl_int status; + + status = clGetDeviceInfo( + device, + CL_DEVICE_SVM_CAPABILITIES, + sizeof(cl_device_svm_capabilities), + &caps, + 0 + ); + checkError(status, "Failed to get device info"); + + if(caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER){ + fprintf(stderr, "Found CL_DEVICE_SVM_FINE_GRAIN_BUFFER. API support in progress\n"); + return false; + } + else if(caps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM){ + fprintf(stderr, "Found CL_DEVICE_SVM_FINE_GRAIN_SYSTEM. API support in progress\n"); + return false; + } + else if(caps & CL_DEVICE_SVM_ATOMICS){ + fprintf(stderr, "Found CL_DEVICE_SVM_ATOMICS. API support in progress\n"); + return false; + } + else if (caps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER){ + return true; + } + else{ + fprintf(stderr, "No SVM Support found!"); + return false; + } + return false; +} + + // Transfer Data to Global Memory or allocate SVM buffer + /* + size_t buf_size = sizeof(double2) * N * iter; + (double2 *)aocl_mmd_shared_mem_alloc(, buf_size, ) + h_inData = (double2 *)clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(double2) * N * iter, 0); + + h_outData = (float2 *)clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(double2) * N * iter, 0); + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, + (void *)h_inData, sizeof(double2) * N * iter, 0, NULL, NULL); + checkError(status, "Failed to map input data"); + + // Copy data from input file to SVM allocated memory. + for (int i = 0; i < N * iter; i++) { + h_inData[i].x = inp[i].x; + h_inData[i].y = inp[i].y; + } + + status = clEnqueueSVMUnmap(queue1, (void *)h_inData, 0, NULL, NULL); + checkError(status, "Failed to unmap input data"); + + // Can't pass bool to device, so convert it to int + int inverse_int = inv; + + // Create Kernels - names must match the kernel name in the original CL file + kernel1 = clCreateKernel(program, "fetch", &status); + checkError(status, "Failed to create fetch kernel"); + + kernel2 = clCreateKernel(program, "fft1d", &status); + checkError(status, "Failed to create fft1d kernel"); + // Set the kernel arguments + + status = clSetKernelArgSVMPointer(kernel1, 0, (void *)h_inData); + checkError(status, "Failed to set kernel1 arg 0"); + + status = clSetKernelArgSVMPointer(kernel2, 0, (void *)h_outData); + checkError(status, "Failed to set kernel1 arg 0"); + status = clSetKernelArgSVMPointer(kernel1, 0, (void *)h_inData); + checkError(status, "Failed to set kernel1 arg 0"); + + status = clSetKernelArgSVMPointer(kernel2, 0, (void *)h_outData); + + checkError(status, "Failed to set kernel arg 0"); + status = clSetKernelArg(kernel2, 1, sizeof(cl_int), (void*)&iter); + checkError(status, "Failed to set kernel arg 1"); + status = clSetKernelArg(kernel2, 2, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set kernel arg 2"); + + printf(inverse_int ? "\tInverse FFT" : "\tFFT"); + printf(" kernel initialization is complete.\n"); + + // Get the itertamp to evaluate performance + fft_time.exec_t = getTimeinMilliSec(); + + // Launch the kernel - we launch a single work item hence enqueue a task + status = clEnqueueTask(queue1, kernel1, 0, NULL, NULL); + checkError(status, "Failed to launch kernel"); + + size_t ls = N/8; + size_t gs = iter * ls; + status = clEnqueueNDRangeKernel(queue1, kernel2, 1, NULL, &gs, &ls, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + // Wait for command queue to complete pending events + status = clFinish(queue1); + checkError(status, "Failed to finish"); + status = clFinish(queue2); + checkError(status, "Failed to finish queue1"); + + // Record execution time + fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, + (void *)h_outData, sizeof(float2) * N * iter, 0, NULL, NULL); + checkError(status, "Failed to map out data"); + + // Copy data from input file to SVM allocated memory. + for (int i = 0; i < N * iter; i++) { + out[i].x = h_outData[i].x; + out[i].y = h_outData[i].y; + } + + status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); + checkError(status, "Failed to unmap input data"); + + // Cleanup + if(kernel1) + clReleaseKernel(kernel1); + if(kernel2) + clReleaseKernel(kernel2); + queue_cleanup(); + if (h_inData) + clSVMFree(context, h_inData); + if (h_outData) + clSVMFree(context, h_outData); + } + */ + + + // return if SVM enabled but no device supported + /* + if(use_svm){ + // TODO: emulation and svm + if (check_valid_svm_device(device)){ + svm_enabled = 1; + } + else{ + fpga_final(); + return 1; + } + return 1; + } + */ \ No newline at end of file diff --git a/api/src/svm.h b/api/src/svm.h new file mode 100644 index 0000000..2fea4cc --- /dev/null +++ b/api/src/svm.h @@ -0,0 +1,10 @@ +// Author: Arjun Ramaswami + +#ifndef SVM_H +#define SVM_H + +#include + +bool check_valid_svm_device(cl_device_id device); + +#endif \ No newline at end of file diff --git a/examples/fft2d.c b/examples/fft2d.c index 619349d..1fb44fe 100644 --- a/examples/fft2d.c +++ b/examples/fft2d.c @@ -18,7 +18,7 @@ static const char *const usage[] = { }; int main(int argc, const char **argv) { - int N = 64, dim = 2, iter = 1, inv = 0, sp = 0, use_bram = 0; + int N = 64, dim = 2, iter = 1, inv = 0, sp = 0, use_bram = 0, interleaving = 0; char *path = "fft2d_emulate.aocx"; const char *platform = "Intel(R) FPGA"; fpga_t timing = {0.0, 0.0, 0.0, 0}; @@ -36,6 +36,7 @@ int main(int argc, const char **argv) { OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), OPT_BOOLEAN('v',"svm", &use_svm, "Use SVM"), OPT_BOOLEAN('m',"bram", &use_bram, "Use BRAM"), + OPT_BOOLEAN('t',"interleaving", &interleaving, "Use burst interleaving in case of BRAM designs"), OPT_STRING('p', "path", &path, "Path to bitstream"), OPT_END(), }; @@ -74,7 +75,7 @@ int main(int argc, const char **argv) { if(use_bram == 1){ // use bram for 2d Transpose temp_timer = getTimeinMilliseconds(); - timing = fftfpgaf_c2c_2d_bram(N, inp, out, inv); + timing = fftfpgaf_c2c_2d_bram(N, inp, out, inv, interleaving); total_api_time += getTimeinMilliseconds() - temp_timer; } else{ diff --git a/examples/fft3d.c b/examples/fft3d.c index 01219b0..61cdeb1 100755 --- a/examples/fft3d.c +++ b/examples/fft3d.c @@ -18,7 +18,7 @@ static const char *const usage[] = { }; int main(int argc, const char **argv) { - int N = 64, dim = 3, iter = 1, inv = 0, sp = 0, use_bram = 0; + int N = 64, dim = 3, iter = 1, inv = 0, sp = 0, use_bram = 0, interleaving = 0; char *path = "fft3d_emulate.aocx"; const char *platform = "Intel(R) FPGA"; fpga_t timing = {0.0, 0.0, 0.0, 0}; @@ -36,6 +36,7 @@ int main(int argc, const char **argv) { OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), OPT_BOOLEAN('v',"svm", &use_svm, "Use SVM"), OPT_BOOLEAN('m',"bram", &use_bram, "Use BRAM"), + OPT_BOOLEAN('t',"interleaving", &interleaving, "Use burst interleaving in case of BRAM designs"), OPT_STRING('p', "path", &path, "Path to bitstream"), OPT_END(), }; @@ -52,6 +53,10 @@ int main(int argc, const char **argv) { if(isInit != 0){ return EXIT_FAILURE; } + else if (isInit == -6){ + printf("SVM Found \n"); + return EXIT_SUCCESS; + } if(sp == 0){ printf("Not implemented. Work in Progress\n"); @@ -75,7 +80,7 @@ int main(int argc, const char **argv) { if(use_bram == 1){ // use bram for 3d Transpose temp_timer = getTimeinMilliseconds(); - timing = fftfpgaf_c2c_3d_bram(N, inp, out, inv); + timing = fftfpgaf_c2c_3d_bram(N, inp, out, inv, interleaving); total_api_time += getTimeinMilliseconds() - temp_timer; } else{ From de65c7d3fd7e8ab52c486d74463a942bb001a523 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Tue, 23 Jun 2020 15:08:12 +0200 Subject: [PATCH 02/76] modified tests based on interleaving args --- tests/test_fft2d_fpga.cpp | 8 ++++---- tests/test_fft3d_fpga.cpp | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/test_fft2d_fpga.cpp b/tests/test_fft2d_fpga.cpp index 7bdaa05..59be121 100644 --- a/tests/test_fft2d_fpga.cpp +++ b/tests/test_fft2d_fpga.cpp @@ -25,15 +25,15 @@ TEST(fft2dFPGATest, InputValidityBRAM){ fpga_t fft_time = {0.0, 0.0, 0.0, 0}; // null inp ptr input - fft_time = fftfpgaf_c2c_2d_bram(64, NULL, test, 0); + fft_time = fftfpgaf_c2c_2d_bram(64, NULL, test, 0, 0); EXPECT_EQ(fft_time.valid, 0); // null out ptr input - fft_time = fftfpgaf_c2c_2d_bram(64, test, NULL, 0); + fft_time = fftfpgaf_c2c_2d_bram(64, test, NULL, 0, 0); EXPECT_EQ(fft_time.valid, 0); // if N not a power of 2 - fft_time = fftfpgaf_c2c_2d_bram(63, test, test, 0); + fft_time = fftfpgaf_c2c_2d_bram(63, test, test, 0, 0); EXPECT_EQ(fft_time.valid, 0); free(test); @@ -55,7 +55,7 @@ TEST(fft2dFPGATest, CorrectnessBRAM){ fftf_create_data(inp, N * N); - fft_time = fftfpgaf_c2c_2d_bram(N, inp, out, 0); + fft_time = fftfpgaf_c2c_2d_bram(N, inp, out, 0, 0); int result = verify_sp_fft2d_fftw(out, inp, N, 0); diff --git a/tests/test_fft3d_fpga.cpp b/tests/test_fft3d_fpga.cpp index 366cc73..7ef456c 100644 --- a/tests/test_fft3d_fpga.cpp +++ b/tests/test_fft3d_fpga.cpp @@ -25,15 +25,15 @@ TEST(fft3dFPGATest, InputValidityBRAM){ fpga_t fft_time = {0.0, 0.0, 0.0, 0}; // null inp ptr input - fft_time = fftfpgaf_c2c_3d_bram(64, NULL, test, 0); + fft_time = fftfpgaf_c2c_3d_bram(64, NULL, test, 0, 0); EXPECT_EQ(fft_time.valid, 0); // null out ptr input - fft_time = fftfpgaf_c2c_3d_bram(64, test, NULL, 0); + fft_time = fftfpgaf_c2c_3d_bram(64, test, NULL, 0, 0); EXPECT_EQ(fft_time.valid, 0); // if N not a power of 2 - fft_time = fftfpgaf_c2c_3d_bram(63, test, test, 0); + fft_time = fftfpgaf_c2c_3d_bram(63, test, test, 0, 0); EXPECT_EQ(fft_time.valid, 0); free(test); @@ -55,7 +55,7 @@ TEST(fft3dFPGATest, CorrectnessBRAM){ fftf_create_data(inp, N * N * N); - fft_time = fftfpgaf_c2c_3d_bram(N, inp, out, 0); + fft_time = fftfpgaf_c2c_3d_bram(N, inp, out, 0, 0); int result = verify_sp_fft3d_fftw(out, inp, N, 0); From dbc09a8a1ffdddfb0adf068769f48b092c70c579 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Tue, 23 Jun 2020 16:39:12 +0200 Subject: [PATCH 03/76] enabled fast emulation --- api/include/fftfpga/fftfpga.h | 2 +- api/src/fftfpga.c | 5 ++--- examples/fft1d.c | 14 +++++++++++--- examples/fft2d.c | 14 +++++++++++--- examples/fft3d.c | 17 +++++++++++++---- tests/test_fft1d_fpga.cpp | 2 +- tests/test_fft2d_fpga.cpp | 4 ++-- tests/test_fft3d_fpga.cpp | 4 ++-- tests/test_fft_setup.cpp | 8 ++++---- 9 files changed, 47 insertions(+), 23 deletions(-) diff --git a/api/include/fftfpga/fftfpga.h b/api/include/fftfpga/fftfpga.h index d8a7bcc..1ebfc32 100755 --- a/api/include/fftfpga/fftfpga.h +++ b/api/include/fftfpga/fftfpga.h @@ -21,7 +21,7 @@ typedef struct fpga_timing { } fpga_t; // Initialize FPGA -extern int fpga_initialize(const char *platform_name, const char *path, int use_svm, int use_emulator); +extern int fpga_initialize(const char *platform_name, const char *path, int use_svm); // Finalize FPGA extern void fpga_final(); diff --git a/api/src/fftfpga.c b/api/src/fftfpga.c index ec056e9..e02b5b9 100755 --- a/api/src/fftfpga.c +++ b/api/src/fftfpga.c @@ -75,7 +75,6 @@ void* fftfpgaf_complex_malloc(size_t sz, int svm){ * @param platform name: string - name of the OpenCL platform * @param path : string - path to binary * @param use_svm : 1 if true 0 otherwise - * @param use_emulator : 1 if true 0 otherwise * @return 0 if successful -1 Path to binary missing -2 Unable to find platform passed as argument @@ -84,7 +83,7 @@ void* fftfpgaf_complex_malloc(size_t sz, int svm){ -5 Device does not support required SVM */ -int fpga_initialize(const char *platform_name, const char *path, int use_svm, int use_emulator){ +int fpga_initialize(const char *platform_name, const char *path, int use_svm){ cl_int status = 0; #ifdef VERBOSE @@ -686,7 +685,7 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, int inv, int interl flagbuf1 = CL_MEM_WRITE_ONLY | CL_CHANNEL_1_INTELFPGA; flagbuf2 = CL_MEM_READ_ONLY | CL_CHANNEL_2_INTELFPGA; } - + // Device memory buffers cl_mem d_inData, d_outData; d_inData = clCreateBuffer(context, flagbuf1, sizeof(float2) * N * N * N, NULL, &status); diff --git a/examples/fft1d.c b/examples/fft1d.c index 111c530..5de4b02 100644 --- a/examples/fft1d.c +++ b/examples/fft1d.c @@ -21,8 +21,8 @@ int main(int argc, const char **argv) { char *path = "fft1d_emulate.aocx"; const char *platform = "Intel(R) FPGA"; fpga_t timing = {0.0, 0.0, 0.0, 0}; - int use_svm = 0, use_emulator = 0; - bool status = true; + int use_svm = 0; + bool status = true, use_emulator = false; struct argparse_option options[] = { OPT_HELP(), @@ -34,6 +34,7 @@ int main(int argc, const char **argv) { OPT_BOOLEAN('v',"svm", &use_svm, "Use SVM"), OPT_BOOLEAN('m',"bram", &use_bram, "Use BRAM"), OPT_STRING('p', "path", &path, "Path to bitstream"), + OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), OPT_END(), }; @@ -44,8 +45,15 @@ int main(int argc, const char **argv) { // Print to console the configuration chosen to execute during runtime print_config(N, dim, iter, inv, sp, use_bram); + + if(use_emulator){ + platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; + } + else{ + platform = "Intel(R) FPGA SDK for OpenCL(TM)"; + } - int isInit = fpga_initialize(platform, path, use_svm, use_emulator); + int isInit = fpga_initialize(platform, path, use_svm); if(isInit != 0){ return EXIT_FAILURE; } diff --git a/examples/fft2d.c b/examples/fft2d.c index 1fb44fe..77458b5 100644 --- a/examples/fft2d.c +++ b/examples/fft2d.c @@ -22,10 +22,10 @@ int main(int argc, const char **argv) { char *path = "fft2d_emulate.aocx"; const char *platform = "Intel(R) FPGA"; fpga_t timing = {0.0, 0.0, 0.0, 0}; - int use_svm = 0, use_emulator = 0; + int use_svm = 0; double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; double temp_timer = 0.0, total_api_time = 0.0; - bool status = true; + bool status = true, use_emulator = false; struct argparse_option options[] = { OPT_HELP(), @@ -38,6 +38,7 @@ int main(int argc, const char **argv) { OPT_BOOLEAN('m',"bram", &use_bram, "Use BRAM"), OPT_BOOLEAN('t',"interleaving", &interleaving, "Use burst interleaving in case of BRAM designs"), OPT_STRING('p', "path", &path, "Path to bitstream"), + OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), OPT_END(), }; @@ -48,8 +49,15 @@ int main(int argc, const char **argv) { // Print to console the configuration chosen to execute during runtime print_config(N, dim, iter, inv, sp, use_bram); + + if(use_emulator){ + platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; + } + else{ + platform = "Intel(R) FPGA SDK for OpenCL(TM)"; + } - int isInit = fpga_initialize(platform, path, use_svm, use_emulator); + int isInit = fpga_initialize(platform, path, use_svm); if(isInit != 0){ return EXIT_FAILURE; } diff --git a/examples/fft3d.c b/examples/fft3d.c index 61cdeb1..81e2fbe 100755 --- a/examples/fft3d.c +++ b/examples/fft3d.c @@ -20,12 +20,12 @@ static const char *const usage[] = { int main(int argc, const char **argv) { int N = 64, dim = 3, iter = 1, inv = 0, sp = 0, use_bram = 0, interleaving = 0; char *path = "fft3d_emulate.aocx"; - const char *platform = "Intel(R) FPGA"; + const char *platform; fpga_t timing = {0.0, 0.0, 0.0, 0}; - int use_svm = 0, use_emulator = 0; + int use_svm = 0; double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; double temp_timer = 0.0, total_api_time = 0.0; - bool status = true; + bool status = true, use_emulator = false; struct argparse_option options[] = { OPT_HELP(), @@ -38,6 +38,7 @@ int main(int argc, const char **argv) { OPT_BOOLEAN('m',"bram", &use_bram, "Use BRAM"), OPT_BOOLEAN('t',"interleaving", &interleaving, "Use burst interleaving in case of BRAM designs"), OPT_STRING('p', "path", &path, "Path to bitstream"), + OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), OPT_END(), }; @@ -48,9 +49,17 @@ int main(int argc, const char **argv) { // Print to console the configuration chosen to execute during runtime print_config(N, dim, iter, inv, sp, use_bram); + + if(use_emulator){ + platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; + } + else{ + platform = "Intel(R) FPGA SDK for OpenCL(TM)"; + } - int isInit = fpga_initialize(platform, path, use_svm, use_emulator); + int isInit = fpga_initialize(platform, path, use_svm); if(isInit != 0){ + fprintf(stderr, "FPGA initialization error\n"); return EXIT_FAILURE; } else if (isInit == -6){ diff --git a/tests/test_fft1d_fpga.cpp b/tests/test_fft1d_fpga.cpp index 822e7d3..b833c6d 100644 --- a/tests/test_fft1d_fpga.cpp +++ b/tests/test_fft1d_fpga.cpp @@ -51,7 +51,7 @@ TEST(fft1dFPGATest, CorrectnessSp){ // malloc data to input fftf_create_data(inp, N); - int isInit= fpga_initialize("Intel(R) FPGA", "emu_64_fft1d/fft1d.aocx", 0, 1); + int isInit= fpga_initialize("Intel(R) FPGA", "emu_64_fft1d/fft1d.aocx", 0); ASSERT_EQ(isInit, 0); fpga_t fft_time = fftfpgaf_c2c_1d(64, inp, out, 0, 1); diff --git a/tests/test_fft2d_fpga.cpp b/tests/test_fft2d_fpga.cpp index 59be121..be5e508 100644 --- a/tests/test_fft2d_fpga.cpp +++ b/tests/test_fft2d_fpga.cpp @@ -46,7 +46,7 @@ TEST(fft2dFPGATest, CorrectnessBRAM){ const int N = (1 << 6); fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft2d_bram/fft2d_bram.aocx", 0, 1); + int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft2d_bram/fft2d_bram.aocx", 0); EXPECT_EQ(isInit, 0); size_t sz = sizeof(float2) * N * N; @@ -103,7 +103,7 @@ TEST(fftFPGATest, ValidSp2dFFTDDR){ const int N = (1 << 6); fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft2d_ddr/fft2d_ddr.aocx", 0, 1); + int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft2d_ddr/fft2d_ddr.aocx", 0); ASSERT_EQ(isInit, 0); size_t sz = sizeof(float2) * N * N; diff --git a/tests/test_fft3d_fpga.cpp b/tests/test_fft3d_fpga.cpp index 7ef456c..60a843a 100644 --- a/tests/test_fft3d_fpga.cpp +++ b/tests/test_fft3d_fpga.cpp @@ -46,7 +46,7 @@ TEST(fft3dFPGATest, CorrectnessBRAM){ const int N = (1 << 6); fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft3d_bram/fft3d_bram.aocx", 0, 1); + int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft3d_bram/fft3d_bram.aocx", 0); ASSERT_EQ(isInit, 0); size_t sz = sizeof(float2) * N * N * N; @@ -103,7 +103,7 @@ TEST(fftFPGATest, ValidSp3dFFTDDR){ const int N = (1 << 6); fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft3d_ddr_triv/fft3d_ddr_triv.aocx", 0, 1); + int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft3d_ddr_triv/fft3d_ddr_triv.aocx", 0); ASSERT_EQ(isInit, 0); size_t sz = sizeof(float2) * N * N * N; diff --git a/tests/test_fft_setup.cpp b/tests/test_fft_setup.cpp index fe4057b..cd8e577 100644 --- a/tests/test_fft_setup.cpp +++ b/tests/test_fft_setup.cpp @@ -16,16 +16,16 @@ extern "C" { */ TEST(fftFPGASetupTest, ValidInit){ // empty path argument - EXPECT_EQ(fpga_initialize("Intel(R) FPGA", "", 0, 1), -1); + EXPECT_EQ(fpga_initialize("Intel(R) FPGA", "", 0), -1); // wrong platform name - EXPECT_EQ(fpga_initialize("TEST", "fft1d_emulate.aocx", 0, 1), -2); + EXPECT_EQ(fpga_initialize("TEST", "fft1d_emulate.aocx", 0), -2); // wrong path argument - EXPECT_EQ(fpga_initialize("Intel(R) FPGA", "TEST", 0, 1), -4); + EXPECT_EQ(fpga_initialize("Intel(R) FPGA", "TEST", 0), -4); // right path and platform names - EXPECT_EQ(fpga_initialize("Intel(R) FPGA", "emu_64_fft3d_bram/fft3d_bram.aocx", 0, 1), 0); + EXPECT_EQ(fpga_initialize("Intel(R) FPGA", "emu_64_fft3d_bram/fft3d_bram.aocx", 0), 0); fpga_final(); } From 6077dd6c9cd912e5deebaaa096ca09290b964a84 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Thu, 25 Jun 2020 18:04:52 +0200 Subject: [PATCH 04/76] Working svm host code --- .gitignore | 1 + api/src/aocl_mmd.h | 524 ----------------------------------------- api/src/fftfpga.c | 178 +++++++++----- api/src/opencl_utils.c | 7 + api/src/svm.c | 30 ++- examples/fft3d.c | 7 +- kernels/CMakeLists.txt | 8 + 7 files changed, 161 insertions(+), 594 deletions(-) delete mode 100644 api/src/aocl_mmd.h diff --git a/.gitignore b/.gitignore index 4844e8c..5b8529f 100755 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ fpgabitstream/ reports/ vscode/ scripts/ +build_svm/ tags *.DS_Store diff --git a/api/src/aocl_mmd.h b/api/src/aocl_mmd.h deleted file mode 100644 index 6adadca..0000000 --- a/api/src/aocl_mmd.h +++ /dev/null @@ -1,524 +0,0 @@ -#ifndef AOCL_MMD_H -#define AOCL_MMD_H - -/* (C) 1992-2019 Intel Corporation. */ -/* Intel, the Intel logo, Intel, MegaCore, NIOS II, Quartus and TalkBack words */ -/* and logos are trademarks of Intel Corporation or its subsidiaries in the U.S. */ -/* and/or other countries. Other marks and brands may be claimed as the property */ -/* of others. See Trademarks on intel.com for full list of Intel trademarks or */ -/* the Trademarks & Brands Names Database (if Intel) or See www.Intel.com/legal (if Altera) */ -/* Your use of Intel Corporation's design tools, logic functions and other */ -/* software and tools, and its AMPP partner logic functions, and any output */ -/* files any of the foregoing (including device programming or simulation */ -/* files), and any associated documentation or information are expressly subject */ -/* to the terms and conditions of the Altera Program License Subscription */ -/* Agreement, Intel MegaCore Function License Agreement, or other applicable */ -/* license agreement, including, without limitation, that your use is for the */ -/* sole purpose of programming logic devices manufactured by Intel and sold by */ -/* Intel or its authorized distributors. Please refer to the applicable */ -/* agreement for further details. */ - - -#ifdef __cplusplus -extern "C" { -#endif - -/* Support for memory mapped ACL devices. - * - * Typical API lifecycle, from the perspective of the caller. - * - * 1. aocl_mmd_open must be called first, to provide a handle for further - * operations. - * - * 2. The interrupt and status handlers must be set. - * - * 3. Read and write operations are performed. - * - * 4. aocl_mmd_close may be called to shut down the device. No further - * operations are permitted until a subsequent aocl_mmd_open call. - * - * aocl_mmd_get_offline_info can be called anytime including before - * open. aocl_mmd_get_info can be called anytime between open and close. - */ - -#ifndef AOCL_MMD_CALL -#if defined(_WIN32) -#define AOCL_MMD_CALL __declspec(dllimport) -#else -#define AOCL_MMD_CALL -#endif -#endif - -#ifndef WEAK -#if defined(_WIN32) -#define WEAK -#else -#define WEAK __attribute__((weak)) -#endif -#endif - -/* The MMD API's version - the runtime expects this string when - * AOCL_MMD_VERSION is queried. This changes only if the API has changed */ -#define AOCL_MMD_VERSION_STRING "18.1" - -/* Memory types that can be supported - bitfield. Other than physical memory - * these types closely align with the OpenCL SVM types. - * - * AOCL_MMD_PHYSICAL_MEMORY - The vendor interface includes IP to communicate - * directly with physical memory such as DDR, QDR, etc. - * - * AOCL_MMD_SVM_COARSE_GRAIN_BUFFER - The vendor interface includes support for - * caching SVM pointer data andy requires explicit function calls from the user - * to sychronize the cache between the host processor and the FPGA. This level - * of SVM is not currently supported by Altera except as a subset of - * SVM_FINE_GAIN_SYSTEM support. - * - * AOCL_MMD_SVM_FINE_GRAIN_BUFFER - The vendor interface includes support for - * caching SVM pointer data and requires additional information from the user - * and/or host runtime that can be collected during pointer allocation in order - * to sychronize the cache between the host processor and the FPGA. Once this - * additional data is provided for an SVM pointer, the vendor interface handles - * cache synchronization between the host processor & the FPGA automatically. - * This level of SVM is not currently supported by Altera except as a subset - * of SVM_FINE_GRAIN_SYSTEM support. - * - * AOCL_MMD_SVM_FINE_GRAIN_SYSTEM - The vendor interface includes support for - * caching SVM pointer data and does not require any additional information to - * sychronize the cache between the host processor and the FPGA. The vendor - * interface handles cache synchronization between the host processor & the - * FPGA automatically for all SVM pointers. This level of SVM support is - * currently under development by Altera and some features may not be fully - * supported. - */ -#define AOCL_MMD_PHYSICAL_MEMORY (1 << 0) -#define AOCL_MMD_SVM_COARSE_GRAIN_BUFFER (1 << 1) -#define AOCL_MMD_SVM_FINE_GRAIN_BUFFER (1 << 2) -#define AOCL_MMD_SVM_FINE_GRAIN_SYSTEM (1 << 3) - -/* program modes - bitfield - * - * AOCL_MMD_PROGRAM_PRESERVE_GLOBAL_MEM - preserve contents of global memory - * when this bit is is set to 1. If programming can't occur without preserving - * global memory contents, the program function must fail, in which case the - * runtime may re-invoke program with this bit set to 0, allowing programming - * to occur even if doing so destroys global memory contents. - * - * more modes are reserved for stacking on in the future - */ -#define AOCL_MMD_PROGRAM_PRESERVE_GLOBAL_MEM (1 << 0) -typedef int aocl_mmd_program_mode_t; - -typedef void* aocl_mmd_op_t; - -typedef struct { - unsigned lo; /* 32 least significant bits of time value. */ - unsigned hi; /* 32 most significant bits of time value. */ -} aocl_mmd_timestamp_t; - - -/* Defines the set of characteristics that can be probed about the board before - * opening a device. The type of data returned by each is specified in - * parentheses in the adjacent comment. - * - * AOCL_MMD_NUM_BOARDS and AOCL_MMD_BOARD_NAMES - * These two fields can be used to implement multi-device support. The MMD - * layer may have a list of devices it is capable of interacting with, each - * identified with a unique name. The length of the list should be returned - * in AOCL_MMD_NUM_BOARDS, and the names of these devices returned in - * AOCL_MMD_BOARD_NAMES. The OpenCL runtime will try to call aocl_mmd_open - * for each board name returned in AOCL_MMD_BOARD_NAMES. - * - * */ -typedef enum { - AOCL_MMD_VERSION = 0, /* Version of MMD (char*)*/ - AOCL_MMD_NUM_BOARDS = 1, /* Number of candidate boards (int)*/ - AOCL_MMD_BOARD_NAMES = 2, /* Names of boards available delimiter=; (char*)*/ - AOCL_MMD_VENDOR_NAME = 3, /* Name of vendor (char*) */ - AOCL_MMD_VENDOR_ID = 4, /* An integer ID for the vendor (int) */ - AOCL_MMD_USES_YIELD = 5, /* 1 if yield must be called to poll hw (int) */ - /* The following can be combined in a bit field: - * AOCL_MMD_PHYSICAL_MEMORY, AOCL_MMD_SVM_COARSE_GRAIN_BUFFER, AOCL_MMD_SVM_FINE_GRAIN_BUFFER, AOCL_MMD_SVM_FINE_GRAIN_SYSTEM. - * Prior to 14.1, all existing devices supported physical memory and no types of SVM memory, so this - * is the default when this operation returns '0' for board MMDs with a version prior to 14.1 - */ - AOCL_MMD_MEM_TYPES_SUPPORTED = 6, -} aocl_mmd_offline_info_t; - -/* Defines the set of characteristics that can be probed about the board after - * opening a device. This can involve communication to the device - * - * AOCL_MMD_NUM_KERNEL_INTERFACES - The number of kernel interfaces, usually 1 - * - * AOCL_MMD_KERNEL_INTERFACES - the handle for each kernel interface. - * param_value will have size AOCL_MMD_NUM_KERNEL_INTERFACES * sizeof int - * - * AOCL_MMD_PLL_INTERFACES - the handle for each pll associated with each - * kernel interface. If a kernel interface is not clocked by acl_kernel_clk - * then return -1 - * - * */ -typedef enum { - AOCL_MMD_NUM_KERNEL_INTERFACES = 1, /* Number of Kernel interfaces (int) */ - AOCL_MMD_KERNEL_INTERFACES = 2, /* Kernel interface (int*) */ - AOCL_MMD_PLL_INTERFACES = 3, /* Kernel clk handles (int*) */ - AOCL_MMD_MEMORY_INTERFACE = 4, /* Global memory handle (int) */ - AOCL_MMD_TEMPERATURE = 5, /* Temperature measurement (float) */ - AOCL_MMD_PCIE_INFO = 6, /* PCIe information (char*) */ - AOCL_MMD_BOARD_NAME = 7, /* Name of board (char*) */ - AOCL_MMD_BOARD_UNIQUE_ID = 8, /* Unique ID of board (int) */ - AOCL_MMD_POWER = 9, /* Power usage of board (Watts) (float) */ - AOCL_MMD_NALLA_DIAGNOSTIC = 10, /* Board specific diagnostics. */ - AOCL_MMD_DEVICE_POWER = 11, /* Power usage of device (Watts) (float) */ - AOCL_MMD_MAC0_ADDRESS = 12, /* MAC address access of modules */ - AOCL_MMD_MAC1_ADDRESS = 13, - AOCL_MMD_MAC2_ADDRESS = 14, - AOCL_MMD_MAC3_ADDRESS = 15, - AOCL_MMD_SCH0_STATUS = 16, - AOCL_MMD_SCH1_STATUS = 17, - AOCL_MMD_SCH2_STATUS = 18, - AOCL_MMD_SCH3_STATUS = 19, - AOCL_MMD_PR_ID = 20, - AOCL_MMD_CONCURRENT_READS = 21, /* # of parallel reads; 1 is serial*/ - AOCL_MMD_CONCURRENT_WRITES = 22, /* # of parallel writes; 1 is serial*/ - AOCL_MMD_CONCURRENT_READS_OR_WRITES = 23, /* total # of concurent operations read + writes*/ - AOCL_MMD_QSFP0_INFO = 24, /*Gets QSFP0 specific info and also setups LR modules if required */ - AOCL_MMD_QSFP1_INFO = 25, /*Gets QSFP1 specific info and also setups LR modules if required */ - AOCL_MMD_QSFP2_INFO = 26, /*Gets QSFP2 specific info and also setups LR modules if required */ - AOCL_MMD_QSFP3_INFO = 27 /*Gets QSFP3 specific info and also setups LR modules if required */ -} aocl_mmd_info_t; - - -typedef struct { - unsigned long long int exception_type; - void *user_private_info; - size_t user_cb; -}aocl_mmd_interrupt_info; - -typedef void (*aocl_mmd_interrupt_handler_fn)( int handle, void* user_data ); -typedef void (*aocl_mmd_device_interrupt_handler_fn)( int handle, aocl_mmd_interrupt_info* data_in, void* user_data ); -typedef void (*aocl_mmd_status_handler_fn)( int handle, void* user_data, aocl_mmd_op_t op, int status ); - - -/* Get information about the board using the enum aocl_mmd_offline_info_t for - * offline info (called without a handle), and the enum aocl_mmd_info_t for - * info specific to a certain board. - * Arguments: - * - * requested_info_id - a value from the aocl_mmd_offline_info_t enum - * - * param_value_size - size of the param_value field in bytes. This should - * match the size of the return type expected as indicated in the enum - * definition. For example, the AOCL_MMD_TEMPERATURE returns a float, so - * the param_value_size should be set to sizeof(float) and you should - * expect the same number of bytes returned in param_size_ret. - * - * param_value - pointer to the variable that will receive the returned info - * - * param_size_ret - receives the number of bytes of data actually returned - * - * Returns: a negative value to indicate error. - */ -AOCL_MMD_CALL int aocl_mmd_get_offline_info( - aocl_mmd_offline_info_t requested_info_id, - size_t param_value_size, - void* param_value, - size_t* param_size_ret ) WEAK; - -AOCL_MMD_CALL int aocl_mmd_get_info( - int handle, - aocl_mmd_info_t requested_info_id, - size_t param_value_size, - void* param_value, - size_t* param_size_ret ) WEAK; - -AOCL_MMD_CALL int aocl_mmd_card_info( - const char * device_name, - aocl_mmd_info_t requested_info_id, - size_t param_value_size, - void* param_value, - size_t* param_size_ret ); - -/*HPC Serial channel status and control functions for access via extention function pointer access in opencl */ -AOCL_MMD_CALL int aocl_mmd_sch_status (const char * device_name, size_t channel_number, unsigned int* param_value); -AOCL_MMD_CALL int aocl_mmd_sch_ctrl (const char * device_name, size_t channel_number, unsigned int param_value); -AOCL_MMD_CALL int aocl_mmd_sch_perfctrl (const char * device_name, size_t channel_number, unsigned int param_value); -AOCL_MMD_CALL int aocl_mmd_sch_rxperf (const char * device_name, size_t channel_number, unsigned int* param_value); -AOCL_MMD_CALL int aocl_mmd_sch_txperf (const char * device_name, size_t channel_number, unsigned int* param_value); - -/* Open and initialize the named device. - * - * The name is typically one specified by the AOCL_MMD_BOARD_NAMES offline - * info. - * - * Arguments: - * name - open the board with this name (provided as a C-style string, - * i.e. NUL terminated ASCII.) - * - * Returns: the non-negative integer handle for the board, otherwise a - * negative value to indicate error. Upon receiving the error, the OpenCL - * runtime will proceed to open other known devices, hence the MMD mustn't - * exit the application if an open call fails. - */ -AOCL_MMD_CALL int aocl_mmd_open(const char *name) WEAK; - -/* Close an opened device, by its handle. - * Returns: 0 on success, negative values on error. - */ -AOCL_MMD_CALL int aocl_mmd_close(int handle) WEAK; - -/* Set the interrupt handler for the opened device. - * The interrupt handler is called whenever the client needs to be notified - * of an asynchronous event signalled by the device internals. - * For example, the kernel has completed or is stalled. - * - * Important: Interrupts from the kernel must be ignored until this handler is - * set - * - * Arguments: - * fn - the callback function to invoke when a kernel interrupt occurs - * user_data - the data that should be passed to fn when it is called. - * - * Returns: 0 if successful, negative on error - */ -AOCL_MMD_CALL int aocl_mmd_set_interrupt_handler( int handle, aocl_mmd_interrupt_handler_fn fn, void* user_data ) WEAK; - -/* Set the device interrupt handler for the opened device. - * The device interrupt handler is called whenever the client needs to be notified - * of a device event signalled by the device internals. - * For example, an ECC error has been reported. - * - * Important: Interrupts from the device must be ignored until this handler is - * set - * - * Arguments: - * fn - the callback function to invoke when a device interrupt occurs - * user_data - the data that should be passed to fn when it is called. - * - * Returns: 0 if successful, negative on error - */ -AOCL_MMD_CALL int aocl_mmd_set_device_interrupt_handler( int handle, aocl_mmd_device_interrupt_handler_fn fn, void* user_data ) WEAK; - -/* Set the operation status handler for the opened device. - * The operation status handler is called with - * status 0 when the operation has completed successfully. - * status negative when the operation completed with errors. - * - * Arguments: - * fn - the callback function to invoke when a status update is to be - * performed. - * user_data - the data that should be passed to fn when it is called. - * - * Returns: 0 if successful, negative on error - */ -AOCL_MMD_CALL int aocl_mmd_set_status_handler( int handle, aocl_mmd_status_handler_fn fn, void* user_data ) WEAK; - -/* If AOCL_MMD_USES_YIELD is 1, this function is called when the host is idle - * and hence possibly waiting for events to be processed by the device. - * If AOCL_MMD_USES_YIELD is 0, this function is never called and the MMD is - * assumed to provide status/event updates via some other execution thread - * such as through an interrupt handler. - * - * Returns: non-zero if the yield function performed useful work such as - * processing DMA transactions, 0 if there is no useful work to be performed - * - * NOTE: yield may be called continuously as long as it reports that it has useful work - */ -AOCL_MMD_CALL int aocl_mmd_yield(int handle) WEAK; - -/* Read, write and copy operations on a single interface. - * If op is NULL - * - Then these calls must block until the operation is complete. - * - The status handler is not called for this operation. - * - * If op is non-NULL, then: - * - These may be non-blocking calls - * - The status handler must be called upon completion, with status 0 - * for success, and a negative value for failure. - * - * Arguments: - * op - the operation object used to track this operations progress - * - * len - the size in bytes to transfer - * - * src - the host buffer being read from - * - * dst - the host buffer being written to - * - * mmd_interface - the handle to the interface being accessed. E.g. To - * access global memory this handle will be whatever is returned by - * aocl_mmd_get_info when called with AOCL_MMD_MEMORY_INTERFACE. - * - * offset/src_offset/dst_offset - the byte offset within the interface that - * the transfer will begin at. - * - * The return value is 0 if the operation launch was successful, and - * negative otherwise. - */ -AOCL_MMD_CALL int aocl_mmd_read( - int handle, - aocl_mmd_op_t op, - size_t len, - void* dst, - int mmd_interface, size_t offset ) WEAK; -AOCL_MMD_CALL int aocl_mmd_write( - int handle, - aocl_mmd_op_t op, - size_t len, - const void* src, - int mmd_interface, size_t offset ) WEAK; -AOCL_MMD_CALL int aocl_mmd_copy( - int handle, - aocl_mmd_op_t op, - size_t len, - int mmd_interface, size_t src_offset, size_t dst_offset ) WEAK; - -/* Host Channel create operation - * Opens channel between host and kernel. - * - * Arguments: - * channel_name - name of channel to initialize. Same name as used in board_spec.xml - * - * queue_depth - the size in bytes of pinned memory queue in system memory - * - * direction - the direction of the channel - * - * The return value is negative if initialization was unsuccessful, and - * positive otherwise. Positive return value is handle to the channel to be used for - * subsequent calls for the channel. - */ -AOCL_MMD_CALL int aocl_mmd_hostchannel_create( - int handle, - char *channel_name, - size_t queue_depth, - int direction) WEAK; - -/* Host Channel destroy operation - * Closes channel between host and kernel. - * - * Arguments: - * channel - the handle to the channel to close, that was obtained with - * create channel - * - * The return value is 0 if the destroy was successful, and negative - * otherwise. - */ -AOCL_MMD_CALL int aocl_mmd_hostchannel_destroy( - int handle, - int channel) WEAK; - -/* Host Channel get buffer operation - * Provide host with pointer to buffer they can access to to write or - * read from kernel, along with space or data available in the buffer - * in bytes. - * - * Arguments: - * channel - the handle to the channel to get the buffer for - * - * buffer_size - the address that this call will write the amount of - * space or data that's available in the buffer, - * depending on direction of the channel, in bytes - * - * status - the address that this call will write to for result of this - * call. Value will be 0 for success, and negative otherwise - * - * The return value is the pointer to the buffer that host can write - * to or read from. NULL if the status is negative. - */ -AOCL_MMD_CALL void *aocl_mmd_hostchannel_get_buffer( - int handle, - int channel, - size_t *buffer_size, - int *status) WEAK; - -/* Host Channel acknowledge buffer operation - * Acknowledge to the channel that the user has written or read data from - * it. This will make the data or additional buffer space available to - * write to or read from kernel. - * - * Arguments: - * channel - the handle to the channel that user is acknowledging - * - * send_size - the size in bytes that the user is acknowledging - * - * status - the address that this call will write to for result of this - * call. Value will be 0 for success, and negative otherwise - * - * The return value is equal to send_size if send_size was less than or - * equal to the buffer_size from get buffer call. If send_size was - * greater, then return value is the amount that was actually sent. - */ -AOCL_MMD_CALL size_t aocl_mmd_hostchannel_ack_buffer( - int handle, - int channel, - size_t send_size, - int *status) WEAK; - -/* Program the device - * - * The host will guarantee that no operations are currently executing on the - * device. That means the kernels will be idle and no read/write/copy - * commands are active. Interrupts should be disabled and the FPGA should - * be reprogrammed with the data from user_data which has size size. The host - * will then call aocl_mmd_set_status_handler and aocl_mmd_set_interrupt_handler - * again. At this point interrupts can be enabled. - * - * The new handle to the board after reprogram does not have to be the same as - * the one before. - * - * Arguments: - * user_data - The binary contents of the fpga.bin file created during - * Quartus II compilation. - * size - the size in bytes of user_data - * program_mode - bit field for programming attributes. See - * aocl_mmd_program_mode_t definition - * - * Returns: the new non-negative integer handle for the board, otherwise a - * negative value to indicate error. - */ -AOCL_MMD_CALL int aocl_mmd_program( int handle, void * user_data, size_t size, aocl_mmd_program_mode_t program_mode) WEAK; - - -/* Shared memory allocator - * Allocates memory that is shared between the host and the FPGA. The - * host will access this memory using the pointer returned by - * aocl_mmd_shared_mem_alloc, while the FPGA will access the shared memory - * using device_ptr_out. If shared memory is not supported this should return - * NULL. - * - * Shared memory survives FPGA reprogramming if the CPU is not rebooted. - * - * Arguments: - * size - the size of the shared memory to allocate - * device_ptr_out - will receive the pointer value used by the FPGA (the device) - * to access the shared memory. Cannot be NULL. The type is - * unsigned long long to handle the case where the host has a - * smaller pointer size than the device. - * - * Returns: The pointer value to be used by the host to access the shared - * memory if successful, otherwise NULL. - */ -AOCL_MMD_CALL void * aocl_mmd_shared_mem_alloc( int handle, size_t size, unsigned long long *device_ptr_out ) WEAK; - -/* Shared memory de-allocator - * Frees previously allocated shared memory. If shared memory is not supported, - * this function should do nothing. - * - * Arguments: - * host_ptr - the host pointer that points to the shared memory, as returned by - * aocl_mmd_shared_mem_alloc - * size - the size of the shared memory to free. Must match the size - * originally passed to aocl_mmd_shared_mem_alloc - */ -AOCL_MMD_CALL void aocl_mmd_shared_mem_free ( int handle, void* host_ptr, size_t size ) WEAK; - -/* DEPRECATED. Use aocl_mmd_program instead - * This reprogram API is only for mmd version previous than 18.1 -*/ -AOCL_MMD_CALL int aocl_mmd_reprogram( int handle, void * user_data, size_t size) WEAK; - - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/api/src/fftfpga.c b/api/src/fftfpga.c index e02b5b9..dd1d6fb 100755 --- a/api/src/fftfpga.c +++ b/api/src/fftfpga.c @@ -57,9 +57,12 @@ void* fftfpga_complex_malloc(size_t sz, int svm){ * @return void ptr or NULL */ void* fftfpgaf_complex_malloc(size_t sz, int svm){ + /* if(svm == 1){ - fprintf(stderr, "Working in progress\n"); - return NULL; + return (float2 *)clSVMAlloc(context, CL_MEM_READ_WRITE, sz, 0); + + //fprintf(stderr, "Working in progress\n"); + //return NULL; // return aocl_mmd_shared_mem_alloc(svm_handle, sizeof(double2) * sz, inData, device_ptr); } else if(sz == 0){ @@ -68,6 +71,8 @@ void* fftfpgaf_complex_malloc(size_t sz, int svm){ else{ return ((float2 *)alignedMalloc(sz)); } + */ + return ((float2 *)alignedMalloc(sz)); } /** @@ -114,13 +119,14 @@ int fpga_initialize(const char *platform_name, const char *path, int use_svm){ device = devices[0]; if(use_svm){ + //svm_enabled = 1; + if(!check_valid_svm_device(device)){ return -5; } else{ printf("Supports SVM \n"); svm_enabled = 1; - return -6; } } @@ -799,11 +805,6 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, int inv, int interl */ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv) { fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - cl_kernel ffta_kernel = NULL, fftb_kernel = NULL, fftc_kernel = NULL; - cl_kernel fetch1_kernel = NULL, fetch2_kernel = NULL; - cl_kernel transpose_kernel = NULL; - cl_kernel store1_kernel = NULL, store2_kernel = NULL; - cl_int status = 0; int num_pts = N * N * N; @@ -816,6 +817,29 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv) { printf("Launching%s 3d FFT transform in DDR \n", inv ? " inverse":""); #endif + // Can't pass bool to device, so convert it to int + int inverse_int = inv; + + // Setup kernels + cl_kernel fetch1_kernel = clCreateKernel(program, "fetch1", &status); + checkError(status, "Failed to create fetch1 kernel"); + cl_kernel ffta_kernel = clCreateKernel(program, "fft3da", &status); + checkError(status, "Failed to create fft3da kernel"); + cl_kernel transpose_kernel = clCreateKernel(program, "transpose", &status); + checkError(status, "Failed to create transpose kernel"); + cl_kernel fftb_kernel = clCreateKernel(program, "fft3db", &status); + checkError(status, "Failed to create fft3db kernel"); + cl_kernel store1_kernel = clCreateKernel(program, "store1", &status); + checkError(status, "Failed to create store1 kernel"); + + cl_kernel fetch2_kernel = clCreateKernel(program, "fetch2", &status); + checkError(status, "Failed to create fetch2 kernel"); + cl_kernel fftc_kernel = clCreateKernel(program, "fft3dc", &status); + checkError(status, "Failed to create fft3dc kernel"); + cl_kernel store2_kernel = clCreateKernel(program, "store2", &status); + checkError(status, "Failed to create store2 kernel"); + + // Setup Queues to the kernels queue_setup(); // Device memory buffers @@ -825,53 +849,78 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv) { d_outData = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); checkError(status, "Failed to allocate output device buffer\n"); - // Copy data from host to device - fft_time.pcie_write_t = getTimeinMilliSec(); + float2 *h_inData, *h_outData; + // allocate SVM buffers + // Required outside the if stm so that compiler doesn't warm about uninitialized variables + h_inData = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); + h_outData = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); - status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * num_pts, inp, 0, NULL, NULL); + if(svm_enabled){ - status = clFinish(queue1); - checkError(status, "failed to finish"); + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map input data"); - fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; - checkError(status, "Failed to copy data to device"); + // copy data into h_inData + for(size_t i = 0; i < num_pts; i++){ + h_inData[i].x = inp[i].x; + h_inData[i].y = inp[i].y; + } - // Can't pass bool to device, so convert it to int - int inverse_int = inv; + status = clEnqueueSVMUnmap(queue1, (void *)h_inData, 0, NULL, NULL); + checkError(status, "Failed to unmap input data"); + // write to fetch kernel using SVM based PCIe + status = clSetKernelArgSVMPointer(fetch1_kernel, 0, (void *)h_inData); + checkError(status, "Failed to set fetch1 kernel arg"); + + status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set ffta kernel arg"); + status=clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftb kernel arg"); + + // kernel stores to DDR memory + status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_outData); + checkError(status, "Failed to set store1 kernel arg"); + + // kernel fetches from DDR memory + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData); + checkError(status, "Failed to set fetch2 kernel arg"); + status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftc kernel arg"); + + // kernel stores using SVM based PCIe to host + status = clSetKernelArgSVMPointer(store2_kernel, 0, (void *)h_outData); + checkError(status, "Failed to set store2 kernel arg"); + } + else{ + // Copy data from host to device + fft_time.pcie_write_t = getTimeinMilliSec(); - fetch1_kernel = clCreateKernel(program, "fetch1", &status); - checkError(status, "Failed to create fetch1 kernel"); - ffta_kernel = clCreateKernel(program, "fft3da", &status); - checkError(status, "Failed to create fft3da kernel"); - transpose_kernel = clCreateKernel(program, "transpose", &status); - checkError(status, "Failed to create transpose kernel"); - fftb_kernel = clCreateKernel(program, "fft3db", &status); - checkError(status, "Failed to create fft3db kernel"); - store1_kernel = clCreateKernel(program, "store1", &status); - checkError(status, "Failed to create store1 kernel"); + status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * num_pts, inp, 0, NULL, NULL); - fetch2_kernel = clCreateKernel(program, "fetch2", &status); - checkError(status, "Failed to create fetch2 kernel"); - fftc_kernel = clCreateKernel(program, "fft3dc", &status); - checkError(status, "Failed to create fft3dc kernel"); - store2_kernel = clCreateKernel(program, "store2", &status); - checkError(status, "Failed to create store2 kernel"); + status = clFinish(queue1); + checkError(status, "failed to finish"); - status = clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData); - checkError(status, "Failed to set fetch1 kernel arg"); - status = clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set ffta kernel arg"); - status = clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fftb kernel arg"); - status = clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_outData); - checkError(status, "Failed to set store1 kernel arg"); + fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; + checkError(status, "Failed to copy data to device"); + + status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData); + checkError(status, "Failed to set fetch1 kernel arg"); - status = clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData); - checkError(status, "Failed to set fetch2 kernel arg"); - status = clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fftc kernel arg"); - status = clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_inData); - checkError(status, "Failed to set store2 kernel arg"); + status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set ffta kernel arg"); + status=clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftb kernel arg"); + + status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_outData); + checkError(status, "Failed to set store1 kernel arg"); + + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData); + checkError(status, "Failed to set fetch2 kernel arg"); + status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftc kernel arg"); + status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_inData); + checkError(status, "Failed to set store2 kernel arg"); + } fft_time.exec_t = getTimeinMilliSec(); status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); @@ -918,18 +967,39 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv) { checkError(status, "failed to finish"); fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; - // Copy results from device to host - fft_time.pcie_read_t = getTimeinMilliSec(); - status = clEnqueueReadBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * num_pts, out, 0, NULL, NULL); - fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; - checkError(status, "Failed to copy data from device"); + if(svm_enabled){ + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, + (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map out data"); + + for(size_t i = 0; i < num_pts; i++){ + out[i].x = h_outData[i].x; + out[i].y = h_outData[i].y; + } + + status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); + checkError(status, "Failed to unmap out data"); + + if (h_inData) + clSVMFree(context, h_inData); + if (h_outData) + clSVMFree(context, h_outData); + + } + else{ + // Copy results from device to host + fft_time.pcie_read_t = getTimeinMilliSec(); + status = clEnqueueReadBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * num_pts, out, 0, NULL, NULL); + fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; + checkError(status, "Failed to copy data from device"); + } queue_cleanup(); if (d_inData) - clReleaseMemObject(d_inData); + clReleaseMemObject(d_inData); if (d_outData) - clReleaseMemObject(d_outData); + clReleaseMemObject(d_outData); if(fetch1_kernel) clReleaseKernel(fetch1_kernel); diff --git a/api/src/opencl_utils.c b/api/src/opencl_utils.c index 705246b..98a17b7 100755 --- a/api/src/opencl_utils.c +++ b/api/src/opencl_utils.c @@ -50,6 +50,10 @@ cl_platform_id findPlatform(const char *platform_name){ char name_search[pl_len + 1]; // VLA tolowercase(platform_name, name_search); +#ifndef NDEBUG + printf("Num of Platforms found - %d\n", num_platforms); +#endif + // Search the platforms for the platform name passed as argument for(int i = 0; i < num_platforms; i++){ // Get the size of the platform name referred to by the id @@ -71,6 +75,9 @@ cl_platform_id findPlatform(const char *platform_name){ } tolowercase(plat_name, plat_name_lc); +#ifndef NDEBUG + printf(" %d - %s \n", i, plat_name_lc); +#endif if( strstr(plat_name_lc, name_search)){ cl_platform_id pid = pids[i]; free(pids); diff --git a/api/src/svm.c b/api/src/svm.c index 05986f5..76f316e 100644 --- a/api/src/svm.c +++ b/api/src/svm.c @@ -2,7 +2,7 @@ #include #include #include "CL/opencl.h" -#include "aocl_mmd.h" +//#include "aocl_mmd.h" #include "svm.h" #include "opencl_utils.h" @@ -24,31 +24,37 @@ int replace(){ bool check_valid_svm_device(cl_device_id device){ cl_device_svm_capabilities caps = 0; cl_int status; + size_t sz_return; status = clGetDeviceInfo( device, CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, - 0 + &sz_return ); checkError(status, "Failed to get device info"); - - if(caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER){ + printf("SVM capabilities: %lu, size %lu \n", caps, sz_return); + + if (caps && CL_DEVICE_SVM_COARSE_GRAIN_BUFFER){ + return true; + } + else if(caps && CL_DEVICE_SVM_FINE_GRAIN_BUFFER){ fprintf(stderr, "Found CL_DEVICE_SVM_FINE_GRAIN_BUFFER. API support in progress\n"); - return false; + return true; } - else if(caps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM){ + else if((caps && CL_DEVICE_SVM_FINE_GRAIN_BUFFER) && (caps &&CL_DEVICE_SVM_ATOMICS)){ + fprintf(stderr, "Found CL_DEVICE_SVM_FINE_GRAIN_BUFFER with support for CL_DEVICE_SVM_ATOMICS. API support in progress\n"); + return true; + } + else if(caps && CL_DEVICE_SVM_FINE_GRAIN_SYSTEM){ fprintf(stderr, "Found CL_DEVICE_SVM_FINE_GRAIN_SYSTEM. API support in progress\n"); return false; } - else if(caps & CL_DEVICE_SVM_ATOMICS){ - fprintf(stderr, "Found CL_DEVICE_SVM_ATOMICS. API support in progress\n"); + else if((caps && CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) && (caps &&CL_DEVICE_SVM_ATOMICS)){ + fprintf(stderr, "Found CL_DEVICE_SVM_FINE_GRAIN_SYSTEM with support for CL_DEVICE_SVM_ATOMICS. API support in progress\n"); return false; } - else if (caps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER){ - return true; - } else{ fprintf(stderr, "No SVM Support found!"); return false; @@ -167,4 +173,4 @@ bool check_valid_svm_device(cl_device_id device){ } return 1; } - */ \ No newline at end of file + */ diff --git a/examples/fft3d.c b/examples/fft3d.c index 81e2fbe..e26a64e 100755 --- a/examples/fft3d.c +++ b/examples/fft3d.c @@ -52,9 +52,11 @@ int main(int argc, const char **argv) { if(use_emulator){ platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; + //platform = "Intel(R) FPGA"; } else{ platform = "Intel(R) FPGA SDK for OpenCL(TM)"; + //platform = "Intel(R) FPGA"; } int isInit = fpga_initialize(platform, path, use_svm); @@ -62,10 +64,6 @@ int main(int argc, const char **argv) { fprintf(stderr, "FPGA initialization error\n"); return EXIT_FAILURE; } - else if (isInit == -6){ - printf("SVM Found \n"); - return EXIT_SUCCESS; - } if(sp == 0){ printf("Not implemented. Work in Progress\n"); @@ -81,6 +79,7 @@ int main(int argc, const char **argv) { status = fftf_create_data(inp, N * N * N); if(!status){ + fprintf(stderr, "Error in Data Creation \n"); free(inp); free(out); return EXIT_FAILURE; diff --git a/kernels/CMakeLists.txt b/kernels/CMakeLists.txt index 8a93552..5fcfb31 100644 --- a/kernels/CMakeLists.txt +++ b/kernels/CMakeLists.txt @@ -11,12 +11,20 @@ set(LOG_POINTS 3 CACHE STRING "Log of per sample data points") math(EXPR POINTS "1 << ${LOG_POINTS}") set(LOG_FFT_SIZE 6 CACHE STRING "Log of points of FFT") +set_property(CACHE LOG_FFT_SIZE PROPERTY STRINGS 4 5 6 7 8 9) math(EXPR FFT_SIZE "1 << ${LOG_FFT_SIZE}") message("-- FFT size is ${FFT_SIZE}") +#set(BUF_LOC "DDR") +set(BUFFER_LOCATION "DDR" CACHE STRING "Buffer location of 3d Transpose") +set_property(CACHE BUFFER_LOCATION PROPERTY STRINGS "DDR" "device") + +message("-- Buffer location for 3d Transpose is ${BUFFER_LOCATION}") + configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/common/fft_config.h.in" "${CMAKE_BINARY_DIR}/kernels/common/fft_config.h" + ESCAPE_QUOTES ) ## From 302844c369049b0dea6afb04ebfa0861a5b5cfb1 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Thu, 25 Jun 2020 18:12:48 +0200 Subject: [PATCH 05/76] malloc zero size check --- api/src/fftfpga.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/api/src/fftfpga.c b/api/src/fftfpga.c index dd1d6fb..1a18d09 100755 --- a/api/src/fftfpga.c +++ b/api/src/fftfpga.c @@ -72,6 +72,9 @@ void* fftfpgaf_complex_malloc(size_t sz, int svm){ return ((float2 *)alignedMalloc(sz)); } */ + if(sz == 0){ + return NULL; + } return ((float2 *)alignedMalloc(sz)); } From d2003e5b88b45b236215162d4e8e55dc6a2249b3 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Fri, 26 Jun 2020 10:15:18 +0200 Subject: [PATCH 06/76] Buffer locations attribute in fft3d kernel --- kernels/common/fft_config.h.in | 2 ++ kernels/fft3d/fft3d_ddr_triv.cl | 20 ++++++++++++-------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/kernels/common/fft_config.h.in b/kernels/common/fft_config.h.in index ea3ac43..1602c0d 100755 --- a/kernels/common/fft_config.h.in +++ b/kernels/common/fft_config.h.in @@ -9,6 +9,8 @@ #define LOGN @LOG_FFT_SIZE@ #define FFT_SIZE @FFT_SIZE@ +#define BUFFER_LOCATION "@BUFFER_LOCATION@" + #endif // FFT_CONFIG_H diff --git a/kernels/fft3d/fft3d_ddr_triv.cl b/kernels/fft3d/fft3d_ddr_triv.cl index 9d8a137..c90ab83 100755 --- a/kernels/fft3d/fft3d_ddr_triv.cl +++ b/kernels/fft3d/fft3d_ddr_triv.cl @@ -27,7 +27,7 @@ int bit_reversed(int x, int bits) { } // Kernel that fetches data from global memory -kernel void fetch1(global volatile float2 * restrict src) { +kernel void fetch1(global volatile float2 * restrict src1) { const unsigned N = (1 << LOGN); for(unsigned k = 0; k < (N * N); k++){ @@ -35,7 +35,7 @@ kernel void fetch1(global volatile float2 * restrict src) { #pragma unroll 8 for(unsigned i = 0; i < N; i++){ - buf[i & ((1< Date: Mon, 29 Jun 2020 22:03:08 +0200 Subject: [PATCH 07/76] Doxygen CMake, removed svm option in data alloc --- api/CMakeLists.txt | 24 ++++++ api/include/fftfpga/fftfpga.h | 121 ++++++++++++++++++++++----- api/src/fftfpga.c | 41 ++++----- api/src/opencl_utils.c | 2 +- examples/fft1d.c | 4 +- examples/fft2d.c | 4 +- examples/fft3d.c | 5 +- kernels/cmake/genKernelTargets.cmake | 2 +- tests/test_fft1d_fpga.cpp | 6 +- tests/test_fft2d_fpga.cpp | 8 +- tests/test_fft3d_fpga.cpp | 8 +- tests/test_fft_setup.cpp | 8 +- tests/test_misc.cpp | 4 +- 13 files changed, 166 insertions(+), 71 deletions(-) diff --git a/api/CMakeLists.txt b/api/CMakeLists.txt index 9654fab..05f975e 100755 --- a/api/CMakeLists.txt +++ b/api/CMakeLists.txt @@ -27,3 +27,27 @@ target_include_directories(${PROJECT_NAME} target_link_libraries(${PROJECT_NAME} PUBLIC ${IntelFPGAOpenCL_LIBRARIES} m) + +## +# Doxygen Build +## + +find_package(Doxygen) +if(DOXYGEN_FOUND) + + # Doxygen Options + set(DOXYGEN_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/docs) + set(DOXYGEN_PROJECT_NAME "FFTFPGA") + set(DOXYGEN_PROJECT_BRIEF "OpenCL based FFT library for FPGAs") + set(DOXYGEN_GENERATE_LATEX YES) + set(DOXYGEN_OPTIMIZE_OUTPUT_FOR_C YES) + set(DOXYGEN_SHOW_FILES YES) + + doxygen_add_docs(doc_doxygen + ${PROJECT_SOURCE_DIR}/include/fftfpga/fftfpga.h + COMMENT "Generate library documentation" + ) + +else (DOXYGEN_FOUND) + message("Doxygen need to be installed to generate the doxygen documentation") +endif (DOXYGEN_FOUND) \ No newline at end of file diff --git a/api/include/fftfpga/fftfpga.h b/api/include/fftfpga/fftfpga.h index 1ebfc32..41ad948 100755 --- a/api/include/fftfpga/fftfpga.h +++ b/api/include/fftfpga/fftfpga.h @@ -1,53 +1,134 @@ // Author: Arjun Ramaswami +/** + * @file fftfpga.h + * @brief Header file that provides APIs for OpenCL Host code + */ + #ifndef FFTFPGA_H #define FFTFPGA_H +/** + * Single Precision Complex Floating Point Data Structure + */ typedef struct { - float x; - float y; + float x; /**< real value */ + float y; /**< imaginary value */ } float2; +/** + * Double Precision Complex Floating Point Data Structure + */ typedef struct { - double x; - double y; + double x; /**< real value */ + double y; /**< imaginary value */ } double2; +/** + * Record time in milliseconds of different FPGA runtime stages + */ typedef struct fpga_timing { - double pcie_read_t; - double pcie_write_t; - double exec_t; - int valid; + double pcie_read_t; /**< Time to read from DDR to host using PCIe bus */ + double pcie_write_t; /**< Time to write from DDR to host using PCIe bus */ + double exec_t; /**< Kernel execution time */ + int valid; /**< Represents 1 signifying valid execution */ } fpga_t; -// Initialize FPGA +/** + * @brief Initialize FPGA + * @param platform_name: name of the OpenCL platform + * @param path : path to binary + * @param use_svm : 1 if true 0 otherwise + * @return 0 if successful + -1 Path to binary missing + -2 Unable to find platform passed as argument + -3 Unable to find devices for given OpenCL platform + -4 Failed to create program, file not found in path + -5 Device does not support required SVM + */ extern int fpga_initialize(const char *platform_name, const char *path, int use_svm); -// Finalize FPGA +/** + * @brief Release FPGA Resources + */ extern void fpga_final(); -// Double precision complex memory allocation -extern void* fftfpga_complex_malloc(size_t sz, int svm); +/** + * @brief Allocate memory of double precision complex floating points + * @param sz : size_t - size to allocate + * @return void ptr or NULL + */ +extern void* fftfpga_complex_malloc(size_t sz); -// Single precision complex memory allocation -extern void* fftfpgaf_complex_malloc(size_t sz, int svm); +/** + * @brief Allocate memory of single precision complex floating points + * @param sz : size_t : size to allocate + * @return void ptr or NULL + */ +extern void* fftfpgaf_complex_malloc(size_t sz); -// Double Precision 1d FFT +/** + * @brief compute an out-of-place double precision complex 1D-FFT on the FPGA + * @param N : integer pointer to size of FFT3d + * @param inp : double2 pointer to input data of size N + * @param out : double2 pointer to output data of size N + * @param inv : int toggle to activate backward FFT + * @param iter : number of iterations of the N point FFT + * @return fpga_t : time taken in milliseconds for data transfers and execution + */ extern fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, int inv, int iter); -// Single Precision 1d FFT +/** + * @brief compute an out-of-place single precision complex 1D-FFT on the FPGA + * @param N : integer pointer to size of FFT3d + * @param inp : float2 pointer to input data of size N + * @param out : float2 pointer to output data of size N + * @param inv : int toggle to activate backward FFT + * @param iter : number of iterations of the N point FFT + * @return fpga_t : time taken in milliseconds for data transfers and execution + */ extern fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, int inv, int iter); -// Single Precision 2d FFT using BRAM +/** + * @brief compute an out-of-place single precision complex 2D-FFT using the BRAM of the FPGA + * @param N : integer pointer to size of FFT2d + * @param inp : float2 pointer to input data of size [N * N] + * @param out : float2 pointer to output data of size [N * N] + * @param inv : int toggle to activate backward FFT + * @param interleaving : 1 if interleaved global memory buffers + * @return fpga_t : time taken in milliseconds for data transfers and execution + */ extern fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv, int interleaving); -// Single Precision 2d FFT using DDR +/** + * @brief compute an out-of-place single precision complex 2D-FFT using the DDR of the FPGA + * @param N : integer pointer to size of FFT2d + * @param inp : float2 pointer to input data of size [N * N] + * @param out : float2 pointer to output data of size [N * N] + * @param inv : int toggle to activate backward FFT + * @return fpga_t : time taken in milliseconds for data transfers and execution + */ extern fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, int inv); -// Single Precision in BRAM 3d FFT +/** + * @brief compute an out-of-place single precision complex 3D-FFT using the BRAM of the FPGA + * @param N : integer pointer addressing the size of FFT3d + * @param inp : float2 pointer to input data of size [N * N * N] + * @param out : float2 pointer to output data of size [N * N * N] + * @param inv : int toggle to activate backward FFT + * @param interleaving : 1 if using burst interleaved global memory buffers + * @return fpga_t : time taken in milliseconds for data transfers and execution + */ extern fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, int inv, int interleaving); -// Single Precision in DDR 3d FFT +/** + * @brief compute an out-of-place single precision complex 3D-FFT using the DDR of the FPGA + * @param N : integer pointer addressing the size of FFT3d + * @param inp : float2 pointer to input data of size [N * N * N] + * @param out : float2 pointer to output data of size [N * N * N] + * @param inv : int toggle to activate backward FFT + * @return fpga_t : time taken in milliseconds for data transfers and execution + */ extern fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv); #endif diff --git a/api/src/fftfpga.c b/api/src/fftfpga.c index 1a18d09..3c01348 100755 --- a/api/src/fftfpga.c +++ b/api/src/fftfpga.c @@ -36,13 +36,8 @@ void queue_cleanup(); * @param svm : 1 if svm * @return void ptr or NULL */ -void* fftfpga_complex_malloc(size_t sz, int svm){ - if(svm == 1){ - fprintf(stderr, "Working in progress\n"); - return NULL; - // return aocl_mmd_shared_mem_alloc(svm_handle, sizeof(double2) * sz, inData, device_ptr); - } - else if(sz == 0){ +void* fftfpga_complex_malloc(size_t sz){ + if(sz == 0){ return NULL; } else{ @@ -56,22 +51,8 @@ void* fftfpga_complex_malloc(size_t sz, int svm){ * @param svm : 1 if svm * @return void ptr or NULL */ -void* fftfpgaf_complex_malloc(size_t sz, int svm){ - /* - if(svm == 1){ - return (float2 *)clSVMAlloc(context, CL_MEM_READ_WRITE, sz, 0); +void* fftfpgaf_complex_malloc(size_t sz){ - //fprintf(stderr, "Working in progress\n"); - //return NULL; - // return aocl_mmd_shared_mem_alloc(svm_handle, sizeof(double2) * sz, inData, device_ptr); - } - else if(sz == 0){ - return NULL; - } - else{ - return ((float2 *)alignedMalloc(sz)); - } - */ if(sz == 0){ return NULL; } @@ -122,8 +103,6 @@ int fpga_initialize(const char *platform_name, const char *path, int use_svm){ device = devices[0]; if(use_svm){ - //svm_enabled = 1; - if(!check_valid_svm_device(device)){ return -5; } @@ -810,6 +789,20 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv) { fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_int status = 0; int num_pts = N * N * N; + + /* + const char* board_name; + int *bytes; + aocl_mmd_offline_info_t info_id; + info_id = AOCL_MMD_BOARD_NAMES; + aocl_mmd_get_offline_info(info_id, sizeof(char*), &board_name, size_t(int)); + + svm_handle = aocl_mmd_open(board_name); + if(svm_handle < 0 ){ + return NULL; + } + return aocl_mmd_shared_mem_alloc(svm_handle, sz, inData, device_ptr); + */ // if N is not a power of 2 if(inp == NULL || out == NULL || ( (N & (N-1)) !=0)){ diff --git a/api/src/opencl_utils.c b/api/src/opencl_utils.c index 98a17b7..38e3e78 100755 --- a/api/src/opencl_utils.c +++ b/api/src/opencl_utils.c @@ -207,7 +207,7 @@ static size_t loadBinary(const char *binary_path, char **buf){ * \return pointer to allocated memory on successful allocation otherwise NULL */ void* alignedMalloc(size_t size){ - const unsigned OPENCL_ALIGNMENT = 64; + size_t OPENCL_ALIGNMENT = 64; void *memptr = NULL; int ret = posix_memalign(&memptr, OPENCL_ALIGNMENT, size); if (ret != 0){ diff --git a/examples/fft1d.c b/examples/fft1d.c index 5de4b02..4899369 100644 --- a/examples/fft1d.c +++ b/examples/fft1d.c @@ -66,8 +66,8 @@ int main(int argc, const char **argv) { else{ size_t inp_sz = sizeof(float2) * N * iter; - float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz, use_svm); - float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz, use_svm); + float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); + float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); status = fftf_create_data(inp, N * iter); if(!status){ diff --git a/examples/fft2d.c b/examples/fft2d.c index 77458b5..3505cba 100644 --- a/examples/fft2d.c +++ b/examples/fft2d.c @@ -70,8 +70,8 @@ int main(int argc, const char **argv) { for(size_t i = 0; i < iter; i++){ size_t inp_sz = sizeof(float2) * N * N; - float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz, use_svm); - float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz, use_svm); + float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); + float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); status = fftf_create_data(inp, N * N); if(!status){ diff --git a/examples/fft3d.c b/examples/fft3d.c index e26a64e..880e1ad 100755 --- a/examples/fft3d.c +++ b/examples/fft3d.c @@ -74,10 +74,11 @@ int main(int argc, const char **argv) { // create and destroy data every iteration size_t inp_sz = sizeof(float2) * N * N * N; - float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz, use_svm); - float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz, use_svm); + float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); + float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); status = fftf_create_data(inp, N * N * N); + printf("\n\n"); if(!status){ fprintf(stderr, "Error in Data Creation \n"); free(inp); diff --git a/kernels/cmake/genKernelTargets.cmake b/kernels/cmake/genKernelTargets.cmake index 25f70de..0154bcc 100644 --- a/kernels/cmake/genKernelTargets.cmake +++ b/kernels/cmake/genKernelTargets.cmake @@ -58,7 +58,7 @@ function(gen_fft_targets) add_custom_target(${kernel_fname}_syn DEPENDS ${SYN_BSTREAM} ${CL_SRC} ${CL_HEADER} COMMENT - "Building a report for ${kernel_fname} to folder ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}" + "Synthesizing for ${kernel_fname} using ${FPGA_BOARD_NAME} to folder ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}" ) endforeach() diff --git a/tests/test_fft1d_fpga.cpp b/tests/test_fft1d_fpga.cpp index b833c6d..7a5db7a 100644 --- a/tests/test_fft1d_fpga.cpp +++ b/tests/test_fft1d_fpga.cpp @@ -45,8 +45,8 @@ TEST(fft1dFPGATest, CorrectnessSp){ int N = (1 << logN); size_t sz = sizeof(float2) * N; - float2 *inp = (float2*)fftfpgaf_complex_malloc(sz, 0); - float2 *out = (float2*)fftfpgaf_complex_malloc(sz, 0); + float2 *inp = (float2*)fftfpgaf_complex_malloc(sz); + float2 *out = (float2*)fftfpgaf_complex_malloc(sz); // malloc data to input fftf_create_data(inp, N); @@ -60,7 +60,7 @@ TEST(fft1dFPGATest, CorrectnessSp){ fftwf_complex* fftw_out = (fftwf_complex*)fftwf_alloc_complex(sz); fftwf_plan plan = fftwf_plan_dft_1d( N, &fftw_inp[0], &fftw_out[0], FFTW_FORWARD, FFTW_ESTIMATE); - float2 *temp = (float2 *)fftfpgaf_complex_malloc(sz, 0); + float2 *temp = (float2 *)fftfpgaf_complex_malloc(sz); for (int i = 0; i < N; i++){ temp[i] = out[i]; diff --git a/tests/test_fft2d_fpga.cpp b/tests/test_fft2d_fpga.cpp index be5e508..18cab06 100644 --- a/tests/test_fft2d_fpga.cpp +++ b/tests/test_fft2d_fpga.cpp @@ -50,8 +50,8 @@ TEST(fft2dFPGATest, CorrectnessBRAM){ EXPECT_EQ(isInit, 0); size_t sz = sizeof(float2) * N * N; - float2 *inp = (float2*)fftfpgaf_complex_malloc(sz, 0); - float2 *out = (float2*)fftfpgaf_complex_malloc(sz, 0); + float2 *inp = (float2*)fftfpgaf_complex_malloc(sz); + float2 *out = (float2*)fftfpgaf_complex_malloc(sz); fftf_create_data(inp, N * N); @@ -107,8 +107,8 @@ TEST(fftFPGATest, ValidSp2dFFTDDR){ ASSERT_EQ(isInit, 0); size_t sz = sizeof(float2) * N * N; - float2 *inp = (float2*)fftfpgaf_complex_malloc(sz, 0); - float2 *out = (float2*)fftfpgaf_complex_malloc(sz, 0); + float2 *inp = (float2*)fftfpgaf_complex_malloc(sz); + float2 *out = (float2*)fftfpgaf_complex_malloc(sz); fftf_create_data(inp, N * N); diff --git a/tests/test_fft3d_fpga.cpp b/tests/test_fft3d_fpga.cpp index 60a843a..5901a6d 100644 --- a/tests/test_fft3d_fpga.cpp +++ b/tests/test_fft3d_fpga.cpp @@ -50,8 +50,8 @@ TEST(fft3dFPGATest, CorrectnessBRAM){ ASSERT_EQ(isInit, 0); size_t sz = sizeof(float2) * N * N * N; - float2 *inp = (float2*)fftfpgaf_complex_malloc(sz, 0); - float2 *out = (float2*)fftfpgaf_complex_malloc(sz, 0); + float2 *inp = (float2*)fftfpgaf_complex_malloc(sz); + float2 *out = (float2*)fftfpgaf_complex_malloc(sz); fftf_create_data(inp, N * N * N); @@ -107,8 +107,8 @@ TEST(fftFPGATest, ValidSp3dFFTDDR){ ASSERT_EQ(isInit, 0); size_t sz = sizeof(float2) * N * N * N; - float2 *inp = (float2*)fftfpgaf_complex_malloc(sz, 0); - float2 *out = (float2*)fftfpgaf_complex_malloc(sz, 0); + float2 *inp = (float2*)fftfpgaf_complex_malloc(sz); + float2 *out = (float2*)fftfpgaf_complex_malloc(sz); fftf_create_data(inp, N * N * N); diff --git a/tests/test_fft_setup.cpp b/tests/test_fft_setup.cpp index cd8e577..2f0ab91 100644 --- a/tests/test_fft_setup.cpp +++ b/tests/test_fft_setup.cpp @@ -34,9 +34,7 @@ TEST(fftFPGASetupTest, ValidInit){ */ TEST(fftFPGASetupTest, ValidDpMalloc){ // request zero size - EXPECT_EQ(fftfpga_complex_malloc(0, 0), nullptr); - // TODO: do not support svm - EXPECT_EQ(fftfpga_complex_malloc(0, 1), nullptr); + EXPECT_EQ(fftfpga_complex_malloc(0), nullptr); } /** @@ -44,7 +42,5 @@ TEST(fftFPGASetupTest, ValidDpMalloc){ */ TEST(fftFPGASetupTest, ValidSpMalloc){ // request zero size - EXPECT_EQ(fftfpgaf_complex_malloc(0, 0), nullptr); - // TODO: do not support svm - EXPECT_EQ(fftfpgaf_complex_malloc(0, 1), nullptr); + EXPECT_EQ(fftfpgaf_complex_malloc(0), nullptr); } \ No newline at end of file diff --git a/tests/test_misc.cpp b/tests/test_misc.cpp index 9587325..eea37df 100644 --- a/tests/test_misc.cpp +++ b/tests/test_misc.cpp @@ -14,7 +14,7 @@ extern "C" { TEST(HelperTest, CreateValidRandomSpData){ int N = 8; size_t sz = sizeof(float2) * N; - float2 *inp = (float2*)fftfpgaf_complex_malloc(sz, 0); + float2 *inp = (float2*)fftfpgaf_complex_malloc(sz); // sz 0 EXPECT_FALSE(fftf_create_data(0, 1)); @@ -31,7 +31,7 @@ TEST(HelperTest, CreateValidRandomSpData){ TEST(HelperTest, CreateValidRandomDpData){ int N = 8; size_t sz = sizeof(double2) * N; - double2 *inp = (double2*)fftfpga_complex_malloc(sz, 0); + double2 *inp = (double2*)fftfpga_complex_malloc(sz); // sz 0 EXPECT_FALSE(fft_create_data(0, 1)); From 9bd3b0842d129e8313c2887d46b1a122524b803c Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Tue, 30 Jun 2020 22:24:09 +0200 Subject: [PATCH 08/76] Build doxygen only on option --- api/CMakeLists.txt | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/api/CMakeLists.txt b/api/CMakeLists.txt index 05f975e..645749d 100755 --- a/api/CMakeLists.txt +++ b/api/CMakeLists.txt @@ -31,23 +31,27 @@ target_link_libraries(${PROJECT_NAME} ## # Doxygen Build ## +option(BUILD_DOC "Build documentation" OFF) -find_package(Doxygen) -if(DOXYGEN_FOUND) - - # Doxygen Options - set(DOXYGEN_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/docs) - set(DOXYGEN_PROJECT_NAME "FFTFPGA") - set(DOXYGEN_PROJECT_BRIEF "OpenCL based FFT library for FPGAs") - set(DOXYGEN_GENERATE_LATEX YES) - set(DOXYGEN_OPTIMIZE_OUTPUT_FOR_C YES) - set(DOXYGEN_SHOW_FILES YES) - - doxygen_add_docs(doc_doxygen - ${PROJECT_SOURCE_DIR}/include/fftfpga/fftfpga.h - COMMENT "Generate library documentation" - ) - -else (DOXYGEN_FOUND) - message("Doxygen need to be installed to generate the doxygen documentation") -endif (DOXYGEN_FOUND) \ No newline at end of file +if(BUILD_DOC) + + find_package(Doxygen) + if(DOXYGEN_FOUND) + + # Doxygen Options + set(DOXYGEN_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/docs) + set(DOXYGEN_PROJECT_NAME "FFTFPGA") + set(DOXYGEN_PROJECT_BRIEF "OpenCL based FFT library for FPGAs") + set(DOXYGEN_GENERATE_LATEX YES) + set(DOXYGEN_OPTIMIZE_OUTPUT_FOR_C YES) + set(DOXYGEN_SHOW_FILES YES) + + doxygen_add_docs(doc_doxygen + ${PROJECT_SOURCE_DIR}/include/fftfpga/fftfpga.h + COMMENT "Generate library documentation" + ) + + else (DOXYGEN_FOUND) + message(WARNING, "Doxygen need to be installed to generate the doxygen documentation") + endif (DOXYGEN_FOUND) +endif() \ No newline at end of file From 683cb211e9ad7ad65a84879023999da87064da1f Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Wed, 1 Jul 2020 22:34:21 +0200 Subject: [PATCH 09/76] test simple batch svm execution two svm fft3d calls within one call --- .gitignore | 1 + api/include/fftfpga/fftfpga.h | 3 + api/src/fftfpga.c | 294 ++++++++++++++++++++++++++++++++++ examples/CMakeLists.txt | 2 +- examples/fft3d_test.c | 149 +++++++++++++++++ 5 files changed, 448 insertions(+), 1 deletion(-) create mode 100755 examples/fft3d_test.c diff --git a/.gitignore b/.gitignore index 5b8529f..ddafe42 100755 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ reports/ vscode/ scripts/ build_svm/ +svm_build/ tags *.DS_Store diff --git a/api/include/fftfpga/fftfpga.h b/api/include/fftfpga/fftfpga.h index 41ad948..bec9ab4 100755 --- a/api/include/fftfpga/fftfpga.h +++ b/api/include/fftfpga/fftfpga.h @@ -131,4 +131,7 @@ extern fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, int inv, int */ extern fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv); +extern fpga_t fftfpgaf_c2c_3d_ddr_test(int N, float2 *inp, float2 *out, int inv); + + #endif diff --git a/api/src/fftfpga.c b/api/src/fftfpga.c index 3c01348..98789a9 100755 --- a/api/src/fftfpga.c +++ b/api/src/fftfpga.c @@ -22,6 +22,7 @@ static cl_context context = NULL; static cl_program program = NULL; static cl_command_queue queue1 = NULL, queue2 = NULL, queue3 = NULL; static cl_command_queue queue4 = NULL, queue5 = NULL, queue6 = NULL; +static cl_command_queue queue7 = NULL, queue8 = NULL; //static int svm_handle; static int svm_enabled = 0; @@ -1021,6 +1022,291 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv) { return fft_time; } +fpga_t fftfpgaf_c2c_3d_ddr_test(int N, float2 *inp, float2 *out, int inv) { + fpga_t fft_time = {0.0, 0.0, 0.0, 0}; + cl_int status = 0; + int num_pts = N * N * N; + + // if N is not a power of 2 + if(inp == NULL || out == NULL || ( (N & (N-1)) !=0)){ + return fft_time; + } + +#ifdef VERBOSE + printf("Launching%s 3d FFT transform in DDR \n", inv ? " inverse":""); +#endif + + // Can't pass bool to device, so convert it to int + int inverse_int = inv; + + // Setup kernels + cl_kernel fetch1_kernel = clCreateKernel(program, "fetch1", &status); + checkError(status, "Failed to create fetch1 kernel"); + cl_kernel ffta_kernel = clCreateKernel(program, "fft3da", &status); + checkError(status, "Failed to create fft3da kernel"); + cl_kernel transpose_kernel = clCreateKernel(program, "transpose", &status); + checkError(status, "Failed to create transpose kernel"); + cl_kernel fftb_kernel = clCreateKernel(program, "fft3db", &status); + checkError(status, "Failed to create fft3db kernel"); + cl_kernel store1_kernel = clCreateKernel(program, "store1", &status); + checkError(status, "Failed to create store1 kernel"); + + cl_kernel fetch2_kernel = clCreateKernel(program, "fetch2", &status); + checkError(status, "Failed to create fetch2 kernel"); + cl_kernel fftc_kernel = clCreateKernel(program, "fft3dc", &status); + checkError(status, "Failed to create fft3dc kernel"); + cl_kernel store2_kernel = clCreateKernel(program, "store2", &status); + checkError(status, "Failed to create store2 kernel"); + + // Setup Queues to the kernels + queue_setup(); + + // Device memory buffers + cl_mem d_outData; + d_outData = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + cl_mem d_outData_2; + d_outData_2 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + float2 *h_inData, *h_outData; + // allocate SVM buffers + // Required outside the if stm so that compiler doesn't warm about uninitialized variables + h_inData = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); + h_outData = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); + + float2 *h_inData_2, *h_outData_2; + h_inData_2 = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); + h_outData_2 = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); + + if(svm_enabled){ + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map input data"); + + // copy data into h_inData + for(size_t i = 0; i < num_pts; i++){ + h_inData[i].x = inp[i].x; + h_inData[i].y = inp[i].y; + } + + status = clEnqueueSVMUnmap(queue1, (void *)h_inData, 0, NULL, NULL); + checkError(status, "Failed to unmap input data"); + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData_2, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map input data"); + + size_t stride = num_pts; + // copy data into h_inData + for(size_t i = 0; i < num_pts; i++){ + h_inData_2[i].x = inp[stride + i].x; + h_inData_2[i].y = inp[stride + i].y; + } + + status = clEnqueueSVMUnmap(queue1, (void *)h_inData_2, 0, NULL, NULL); + checkError(status, "Failed to unmap input data"); + + /* + * kernel arguments + */ + // write to fetch kernel using SVM based PCIe + status = clSetKernelArgSVMPointer(fetch1_kernel, 0, (void *)h_inData); + checkError(status, "Failed to set fetch1 kernel arg"); + + status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set ffta kernel arg"); + status=clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftb kernel arg"); + + // kernel stores to DDR memory + status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_outData); + checkError(status, "Failed to set store1 kernel arg"); + + // kernel fetches from DDR memory + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData); + checkError(status, "Failed to set fetch2 kernel arg"); + status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftc kernel arg"); + + // kernel stores using SVM based PCIe to host + status = clSetKernelArgSVMPointer(store2_kernel, 0, (void *)h_outData); + checkError(status, "Failed to set store2 kernel arg"); + + /* + * First iter arg set + * Start first half of first batch's iter + */ + + fft_time.exec_t = getTimeinMilliSec(); + status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second fft kernel"); + + status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second transpose kernel"); + + // Wait for all command queues to complete pending events + status = clFinish(queue1); + checkError(status, "failed to finish"); + status = clFinish(queue2); + checkError(status, "failed to finish"); + status = clFinish(queue3); + checkError(status, "failed to finish"); + status = clFinish(queue4); + checkError(status, "failed to finish"); + status = clFinish(queue5); + checkError(status, "failed to finish"); + + // Second Batch: Set first iter of second batch + // Change only the SVM and ddr pointers + status = clSetKernelArgSVMPointer(fetch1_kernel, 0, (void *)h_inData_2); + checkError(status, "Failed to set fetch1 kernel arg"); + status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_outData_2); + checkError(status, "Failed to set store1 kernel arg"); + + // Start second batch first iter + status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second fft kernel"); + + status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second transpose kernel"); + + // first batch second iter + status = clEnqueueTask(queue6, fetch2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue7, fftc_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue8, store2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clFinish(queue1); + checkError(status, "failed to finish"); + status = clFinish(queue2); + checkError(status, "failed to finish"); + status = clFinish(queue3); + checkError(status, "failed to finish"); + status = clFinish(queue4); + checkError(status, "failed to finish"); + status = clFinish(queue5); + checkError(status, "failed to finish"); + status = clFinish(queue6); + checkError(status, "failed to finish"); + status = clFinish(queue7); + checkError(status, "failed to finish"); + status = clFinish(queue8); + checkError(status, "failed to finish"); + + // second batch second iter + // kernel fetches from DDR memory + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData_2); + checkError(status, "Failed to set fetch2 kernel arg"); + status = clSetKernelArgSVMPointer(store2_kernel, 0, (void *)h_outData_2); + checkError(status, "Failed to set store2 kernel arg"); + + status = clEnqueueTask(queue6, fetch2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue7, fftc_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue8, store2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clFinish(queue6); + checkError(status, "failed to finish"); + status = clFinish(queue7); + checkError(status, "failed to finish"); + status = clFinish(queue8); + checkError(status, "failed to finish"); + fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, + (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map out data"); + + for(size_t i = 0; i < num_pts; i++){ + out[i].x = h_outData[i].x; + out[i].y = h_outData[i].y; + } + + status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); + checkError(status, "Failed to unmap out data"); + + status = clEnqueueSVMMap(queue2, CL_TRUE, CL_MAP_READ, + (void *)h_outData_2, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map out data"); + + for(size_t i = 0; i < num_pts; i++){ + out[stride + i].x = h_outData_2[i].x; + out[stride + i].y = h_outData_2[i].y; + } + + status = clEnqueueSVMUnmap(queue2, (void *)h_outData_2, 0, NULL, NULL); + checkError(status, "Failed to unmap out data"); + + if (h_inData) + clSVMFree(context, h_inData); + if (h_outData) + clSVMFree(context, h_outData); + + if (h_inData_2) + clSVMFree(context, h_inData_2); + if (h_outData_2) + clSVMFree(context, h_outData_2); + + } + + queue_cleanup(); + + if (d_outData) + clReleaseMemObject(d_outData); + if (d_outData_2) + clReleaseMemObject(d_outData_2); + + if(fetch1_kernel) + clReleaseKernel(fetch1_kernel); + if(fetch2_kernel) + clReleaseKernel(fetch2_kernel); + + if(ffta_kernel) + clReleaseKernel(ffta_kernel); + if(fftb_kernel) + clReleaseKernel(fftb_kernel); + if(fftc_kernel) + clReleaseKernel(fftc_kernel); + + if(transpose_kernel) + clReleaseKernel(transpose_kernel); + + if(store1_kernel) + clReleaseKernel(store1_kernel); + if(store2_kernel) + clReleaseKernel(store2_kernel); + + fft_time.valid = 1; + return fft_time; +} + /** * \brief Create a command queue for each kernel @@ -1040,6 +1326,10 @@ void queue_setup(){ checkError(status, "Failed to create command queue5"); queue6 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status); checkError(status, "Failed to create command queue6"); + queue7 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status); + checkError(status, "Failed to create command queue6"); + queue8 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status); + checkError(status, "Failed to create command queue6"); } /** @@ -1058,4 +1348,8 @@ void queue_cleanup() { clReleaseCommandQueue(queue5); if(queue6) clReleaseCommandQueue(queue6); + if(queue7) + clReleaseCommandQueue(queue7); + if(queue8) + clReleaseCommandQueue(queue8); } diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index de891af..1dc068e 100755 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -4,7 +4,7 @@ project(examplesfftfpga VERSION 0.1 DESCRIPTION "Example Code that uses libfftfpga" LANGUAGES C CXX) -set(examples fft3d fft2d fft1d) +set(examples fft3d fft2d fft1d fft3d_test) # create a target for each of the example foreach(example ${examples}) diff --git a/examples/fft3d_test.c b/examples/fft3d_test.c new file mode 100755 index 0000000..45e5ec8 --- /dev/null +++ b/examples/fft3d_test.c @@ -0,0 +1,149 @@ +// Author: Arjun Ramaswami + +#include +#include // EXIT_FAILURE +#include +#include + +#include "CL/opencl.h" +#include "fftfpga/fftfpga.h" + +#include "argparse.h" +#include "helper.h" +#include "verify_fftw.h" + +static const char *const usage[] = { + "bin/host [options]", + NULL, +}; + +int main(int argc, const char **argv) { + int N = 64, dim = 3, iter = 1, inv = 0, sp = 0, use_bram = 0, interleaving = 0; + char *path = "fft3d_emulate.aocx"; + const char *platform; + fpga_t timing = {0.0, 0.0, 0.0, 0}; + int use_svm = 0; + double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; + double temp_timer = 0.0, total_api_time = 0.0; + bool status = true, use_emulator = false; + + struct argparse_option options[] = { + OPT_HELP(), + OPT_GROUP("Basic Options"), + OPT_INTEGER('n',"n", &N, "FFT Points"), + OPT_BOOLEAN('s',"sp", &sp, "Single Precision"), + OPT_INTEGER('i',"iter", &iter, "Iterations"), + OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), + OPT_BOOLEAN('v',"svm", &use_svm, "Use SVM"), + OPT_BOOLEAN('m',"bram", &use_bram, "Use BRAM"), + OPT_BOOLEAN('t',"interleaving", &interleaving, "Use burst interleaving in case of BRAM designs"), + OPT_STRING('p', "path", &path, "Path to bitstream"), + OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), + OPT_END(), + }; + + struct argparse argparse; + argparse_init(&argparse, options, usage, 0); + argparse_describe(&argparse, "Computing FFT using FPGA", "FFT size and dimensions are mandatory, default dimension and number of iterations are 1"); + argc = argparse_parse(&argparse, argc, argv); + + // Print to console the configuration chosen to execute during runtime + print_config(N, dim, iter, inv, sp, use_bram); + + if(use_emulator){ + platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; + //platform = "Intel(R) FPGA"; + } + else{ + platform = "Intel(R) FPGA SDK for OpenCL(TM)"; + //platform = "Intel(R) FPGA"; + } + + int isInit = fpga_initialize(platform, path, use_svm); + if(isInit != 0){ + fprintf(stderr, "FPGA initialization error\n"); + return EXIT_FAILURE; + } + + if(sp == 0){ + printf("Not implemented. Work in Progress\n"); + return EXIT_SUCCESS; + } + else{ + for(size_t i = 0; i < iter; i++){ + + // create and destroy data every iteration + size_t inp_sz = sizeof(float2) * N * N * N * 2; + float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); + float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); + + status = fftf_create_data(inp, N * N * N * 2); + if(!status){ + fprintf(stderr, "Error in Data Creation \n"); + free(inp); + free(out); + return EXIT_FAILURE; + } + + // use ddr for 3d Transpose + temp_timer = getTimeinMilliseconds(); + timing = fftfpgaf_c2c_3d_ddr_test(N, inp, out, inv); + total_api_time += getTimeinMilliseconds() - temp_timer; + +#ifdef USE_FFTW + + if(!verify_sp_fft3d_fftw(out, inp, N, inv)){ + fprintf(stderr, "3d FFT Verification Failed \n"); + free(inp); + free(out); + return EXIT_FAILURE; + } + + size_t inp_test_sz = sizeof(float2) * N * N * N; + float2 *inp_test = (float2*)fftfpgaf_complex_malloc(inp_test_sz); + float2 *out_test = (float2*)fftfpgaf_complex_malloc(inp_test_sz); + + size_t str = N * N * N; + for(size_t i = 0; i < (N*N*N); i++){ + inp_test[i].x = inp[str + i].x; + inp_test[i].y = inp[str + i].y; + + out_test[i].x = out[str + i].x; + out_test[i].y = out[str + i].y; + } + + if(!verify_sp_fft3d_fftw(out_test, inp_test, N, inv)){ + fprintf(stderr, "3d FFT Verification Failed for second batch\n"); + free(inp_test); + free(out_test); + return EXIT_FAILURE; + } +#endif + if(timing.valid == 0){ + fprintf(stderr, "Invalid execution, timing found to be 0"); + free(inp); + free(out); + return EXIT_FAILURE; + } + + avg_rd += timing.pcie_read_t; + avg_wr += timing.pcie_write_t; + avg_exec += timing.exec_t; + + // destroy FFT input and output + free(inp); + free(out); + + free(inp_test); + free(out_test); + } // iter + } // sp condition + + // destroy fpga state + fpga_final(); + + // display performance measures + display_measures(total_api_time, avg_rd, avg_wr, avg_exec, N, dim, iter, inv, sp); + + return EXIT_SUCCESS; +} From dc58b888c44660cc2d7602621fa5d78ea4f0c38d Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Wed, 1 Jul 2020 23:05:52 +0200 Subject: [PATCH 10/76] Profile synthesis, fix without fftw --- examples/fft3d_test.c | 4 ++-- kernels/cmake/genKernelTargets.cmake | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/examples/fft3d_test.c b/examples/fft3d_test.c index 45e5ec8..6900e92 100755 --- a/examples/fft3d_test.c +++ b/examples/fft3d_test.c @@ -118,6 +118,8 @@ int main(int argc, const char **argv) { free(out_test); return EXIT_FAILURE; } + free(inp_test); + free(out_test); #endif if(timing.valid == 0){ fprintf(stderr, "Invalid execution, timing found to be 0"); @@ -134,8 +136,6 @@ int main(int argc, const char **argv) { free(inp); free(out); - free(inp_test); - free(out_test); } // iter } // sp condition diff --git a/kernels/cmake/genKernelTargets.cmake b/kernels/cmake/genKernelTargets.cmake index 0154bcc..78deba0 100644 --- a/kernels/cmake/genKernelTargets.cmake +++ b/kernels/cmake/genKernelTargets.cmake @@ -20,6 +20,8 @@ function(gen_fft_targets) "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/emu_${FFT_SIZE}_${kernel_fname}/${kernel_fname}.aocx") set(REP_BSTREAM "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/rep_${FFT_SIZE}_${kernel_fname}/${kernel_fname}.aocr") + set(PROF_BSTREAM + "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/prof_${FFT_SIZE}_${kernel_fname}/${kernel_fname}.aocx") set(SYN_BSTREAM "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/syn_${FFT_SIZE}_${kernel_fname}/${kernel_fname}.aocx") @@ -49,6 +51,18 @@ function(gen_fft_targets) "Building a report for ${kernel_fname} to folder ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}" ) + # Profile Target + add_custom_command(OUTPUT ${PROF_BSTREAM} + COMMAND ${IntelFPGAOpenCL_AOC} ${CL_SRC} ${CL_INCL_DIR} ${AOC_FLAGS} ${PROF_FLAGS} -board=${FPGA_BOARD_NAME} -o ${PROF_BSTREAM} + MAIN_DEPENDENCY ${CL_SRC} + ) + + add_custom_target(${kernel_fname}_profile + DEPENDS ${PROF_BSTREAM} ${CL_SRC} ${CL_HEADER} + COMMENT + "Profiling for ${kernel_fname} using ${FPGA_BOARD_NAME} to folder ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}" + ) + # Synthesis Target add_custom_command(OUTPUT ${SYN_BSTREAM} COMMAND ${IntelFPGAOpenCL_AOC} ${CL_SRC} ${CL_INCL_DIR} ${AOC_FLAGS} -board=${FPGA_BOARD_NAME} -o ${SYN_BSTREAM} From a60c417955bb70d0b330e7c54c1dcc5df9e0181c Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Fri, 3 Jul 2020 15:54:39 +0200 Subject: [PATCH 11/76] sq: general batch --- api/include/fftfpga/fftfpga.h | 2 +- api/src/fftfpga.c | 275 +++++++++++++++++----------------- examples/common/verify_fftw.c | 6 +- examples/common/verify_fftw.h | 2 +- examples/fft3d.c | 2 +- examples/fft3d_test.c | 15 +- tests/test_fft3d_fpga.cpp | 74 ++++++++- 7 files changed, 223 insertions(+), 153 deletions(-) diff --git a/api/include/fftfpga/fftfpga.h b/api/include/fftfpga/fftfpga.h index bec9ab4..fbc9542 100755 --- a/api/include/fftfpga/fftfpga.h +++ b/api/include/fftfpga/fftfpga.h @@ -131,7 +131,7 @@ extern fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, int inv, int */ extern fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv); -extern fpga_t fftfpgaf_c2c_3d_ddr_test(int N, float2 *inp, float2 *out, int inv); +extern fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, float2 *inp, float2 *out, int inv, int how_many); #endif diff --git a/api/src/fftfpga.c b/api/src/fftfpga.c index 98789a9..42dda8b 100755 --- a/api/src/fftfpga.c +++ b/api/src/fftfpga.c @@ -1022,13 +1022,17 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv) { return fft_time; } -fpga_t fftfpgaf_c2c_3d_ddr_test(int N, float2 *inp, float2 *out, int inv) { +fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, float2 *inp, float2 *out, int inv, int how_many) { fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_int status = 0; int num_pts = N * N * N; // if N is not a power of 2 - if(inp == NULL || out == NULL || ( (N & (N-1)) !=0)){ + if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || (how_many <= 0)){ + return fft_time; + } + + if(!svm_enabled){ return fft_time; } @@ -1061,119 +1065,115 @@ fpga_t fftfpgaf_c2c_3d_ddr_test(int N, float2 *inp, float2 *out, int inv) { // Setup Queues to the kernels queue_setup(); - // Device memory buffers - cl_mem d_outData; - d_outData = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + // Device memory buffers: double buffers + cl_mem d_outData_0; + d_outData_0 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); checkError(status, "Failed to allocate output device buffer\n"); - cl_mem d_outData_2; - d_outData_2 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + cl_mem d_outData_1; + d_outData_1 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); checkError(status, "Failed to allocate output device buffer\n"); - float2 *h_inData, *h_outData; - // allocate SVM buffers - // Required outside the if stm so that compiler doesn't warm about uninitialized variables - h_inData = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); - h_outData = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); - - float2 *h_inData_2, *h_outData_2; - h_inData_2 = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); - h_outData_2 = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); + // allocate and initialize SVM buffers - if(svm_enabled){ + float2 *h_inData[how_many], *h_outData[how_many]; + for(size_t i = 0; i < how_many; i++){ + h_inData[i] = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); + h_outData[i] = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData, sizeof(float2) * num_pts, 0, NULL, NULL); + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData[i], sizeof(float2) * num_pts, 0, NULL, NULL); checkError(status, "Failed to map input data"); // copy data into h_inData - for(size_t i = 0; i < num_pts; i++){ - h_inData[i].x = inp[i].x; - h_inData[i].y = inp[i].y; - } + size_t stride = i * num_pts; + for(size_t j = 0; j < num_pts; j++){ + h_inData[i][j].x = inp[stride + j].x; + h_inData[i][j].y = inp[stride + j].y; - status = clEnqueueSVMUnmap(queue1, (void *)h_inData, 0, NULL, NULL); - checkError(status, "Failed to unmap input data"); - - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData_2, sizeof(float2) * num_pts, 0, NULL, NULL); - checkError(status, "Failed to map input data"); - - size_t stride = num_pts; - // copy data into h_inData - for(size_t i = 0; i < num_pts; i++){ - h_inData_2[i].x = inp[stride + i].x; - h_inData_2[i].y = inp[stride + i].y; + //printf("%lu: %lu: (%f, %f)\n", i, j, h_inData[i][j].x, h_inData[i][j].y); } - status = clEnqueueSVMUnmap(queue1, (void *)h_inData_2, 0, NULL, NULL); + status = clEnqueueSVMUnmap(queue1, (void *)h_inData[i], 0, NULL, NULL); checkError(status, "Failed to unmap input data"); + } + printf("Transferred data to SVM buffers \n"); - /* - * kernel arguments - */ - // write to fetch kernel using SVM based PCIe - status = clSetKernelArgSVMPointer(fetch1_kernel, 0, (void *)h_inData); - checkError(status, "Failed to set fetch1 kernel arg"); - - status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set ffta kernel arg"); - status=clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fftb kernel arg"); + /* + * kernel arguments + */ + // write to fetch kernel using SVM based PCIe + status = clSetKernelArgSVMPointer(fetch1_kernel, 0, (void *)h_inData[0]); + checkError(status, "Failed to set fetch1 kernel arg"); - // kernel stores to DDR memory - status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_outData); - checkError(status, "Failed to set store1 kernel arg"); + status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set ffta kernel arg"); + status=clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftb kernel arg"); - // kernel fetches from DDR memory - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData); - checkError(status, "Failed to set fetch2 kernel arg"); - status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fftc kernel arg"); + // kernel stores to DDR memory + status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_outData_0); + checkError(status, "Failed to set store1 kernel arg"); - // kernel stores using SVM based PCIe to host - status = clSetKernelArgSVMPointer(store2_kernel, 0, (void *)h_outData); - checkError(status, "Failed to set store2 kernel arg"); + // kernel fetches from DDR memory + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData_0); + checkError(status, "Failed to set fetch2 kernel arg"); + status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftc kernel arg"); - /* - * First iter arg set - * Start first half of first batch's iter - */ + // kernel stores using SVM based PCIe to host + status = clSetKernelArgSVMPointer(store2_kernel, 0, (void *)h_outData[0]); + checkError(status, "Failed to set store2 kernel arg"); - fft_time.exec_t = getTimeinMilliSec(); - status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); + /* + * First batch write phase + */ + fft_time.exec_t = getTimeinMilliSec(); + status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); - status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); - status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); - status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second fft kernel"); + status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second fft kernel"); - status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second transpose kernel"); + status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second transpose kernel"); - // Wait for all command queues to complete pending events - status = clFinish(queue1); - checkError(status, "failed to finish"); - status = clFinish(queue2); - checkError(status, "failed to finish"); - status = clFinish(queue3); - checkError(status, "failed to finish"); - status = clFinish(queue4); - checkError(status, "failed to finish"); - status = clFinish(queue5); - checkError(status, "failed to finish"); + // Wait for all command queues to complete pending events + status = clFinish(queue1); + checkError(status, "failed to finish"); + status = clFinish(queue2); + checkError(status, "failed to finish"); + status = clFinish(queue3); + checkError(status, "failed to finish"); + status = clFinish(queue4); + checkError(status, "failed to finish"); + status = clFinish(queue5); + checkError(status, "failed to finish"); + + printf("First iter write phase complete \n"); + for(size_t i = 1; i < how_many; i++){ - // Second Batch: Set first iter of second batch - // Change only the SVM and ddr pointers - status = clSetKernelArgSVMPointer(fetch1_kernel, 0, (void *)h_inData_2); + /* + * write phase of current iteration + */ + // change write phase host and ddr ptrs + status = clSetKernelArgSVMPointer(fetch1_kernel, 0, (void *)h_inData[i]); checkError(status, "Failed to set fetch1 kernel arg"); - status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_outData_2); - checkError(status, "Failed to set store1 kernel arg"); + if(i % 2 == 1){ + status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_outData_1); + checkError(status, "Failed to set store1 kernel arg"); + } + else{ + status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_outData_0); + checkError(status, "Failed to set store1 kernel arg"); + } - // Start second batch first iter + // Start write phase of current iteration status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fetch kernel"); @@ -1189,7 +1189,9 @@ fpga_t fftfpgaf_c2c_3d_ddr_test(int N, float2 *inp, float2 *out, int inv) { status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); checkError(status, "Failed to launch second transpose kernel"); - // first batch second iter + /* + * Read phase of previous iteration + */ status = clEnqueueTask(queue6, fetch2_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fetch kernel"); @@ -1216,72 +1218,68 @@ fpga_t fftfpgaf_c2c_3d_ddr_test(int N, float2 *inp, float2 *out, int inv) { status = clFinish(queue8); checkError(status, "failed to finish"); - // second batch second iter - // kernel fetches from DDR memory - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData_2); - checkError(status, "Failed to set fetch2 kernel arg"); - status = clSetKernelArgSVMPointer(store2_kernel, 0, (void *)h_outData_2); + // Change read phase ptrs to current iteration + if( (i % 2) == 1){ + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData_1); + checkError(status, "Failed to set fetch2 kernel arg"); + } + else{ + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData_0); + checkError(status, "Failed to set fetch2 kernel arg"); + } + status = clSetKernelArgSVMPointer(store2_kernel, 0, (void *)h_outData[i]); checkError(status, "Failed to set store2 kernel arg"); + } + + printf("Read phase left \n"); + status = clEnqueueTask(queue6, fetch2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); - status = clEnqueueTask(queue6, fetch2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - - status = clEnqueueTask(queue7, fftc_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue8, store2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); - - status = clFinish(queue6); - checkError(status, "failed to finish"); - status = clFinish(queue7); - checkError(status, "failed to finish"); - status = clFinish(queue8); - checkError(status, "failed to finish"); - fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + status = clEnqueueTask(queue7, fftc_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, - (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); - checkError(status, "Failed to map out data"); + status = clEnqueueTask(queue8, store2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); - for(size_t i = 0; i < num_pts; i++){ - out[i].x = h_outData[i].x; - out[i].y = h_outData[i].y; - } + status = clFinish(queue6); + checkError(status, "failed to finish"); + status = clFinish(queue7); + checkError(status, "failed to finish"); + status = clFinish(queue8); + checkError(status, "failed to finish"); + fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; - status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); - checkError(status, "Failed to unmap out data"); + for(size_t i = 0; i < how_many; i++){ status = clEnqueueSVMMap(queue2, CL_TRUE, CL_MAP_READ, - (void *)h_outData_2, sizeof(float2) * num_pts, 0, NULL, NULL); + (void *)h_outData[i], sizeof(float2) * num_pts, 0, NULL, NULL); checkError(status, "Failed to map out data"); - for(size_t i = 0; i < num_pts; i++){ - out[stride + i].x = h_outData_2[i].x; - out[stride + i].y = h_outData_2[i].y; + size_t stride = how_many * num_pts; + for(size_t j = 0; j < num_pts; j++){ + out[stride + j].x = h_outData[i][j].x; + out[stride + j].y = h_outData[i][j].y; + printf("%lu: %lu: (%f, %f)\n", i, j, out[stride+j].x, out[stride+j].y); } - status = clEnqueueSVMUnmap(queue2, (void *)h_outData_2, 0, NULL, NULL); + status = clEnqueueSVMUnmap(queue2, (void *)h_outData[i], 0, NULL, NULL); checkError(status, "Failed to unmap out data"); + } - if (h_inData) - clSVMFree(context, h_inData); - if (h_outData) - clSVMFree(context, h_outData); - - if (h_inData_2) - clSVMFree(context, h_inData_2); - if (h_outData_2) - clSVMFree(context, h_outData_2); - + printf("written to SVM phase left \n"); + for(size_t i = 0; i < how_many; i++){ + clSVMFree(context, h_inData[i]); + clSVMFree(context, h_outData[i]); } + printf("Freed SVM buffer \n"); queue_cleanup(); - if (d_outData) - clReleaseMemObject(d_outData); - if (d_outData_2) - clReleaseMemObject(d_outData_2); + printf("Freeing ddr buffer\n"); + if (d_outData_0) + clReleaseMemObject(d_outData_0); + if (d_outData_1) + clReleaseMemObject(d_outData_1); if(fetch1_kernel) clReleaseKernel(fetch1_kernel); @@ -1303,6 +1301,7 @@ fpga_t fftfpgaf_c2c_3d_ddr_test(int N, float2 *inp, float2 *out, int inv) { if(store2_kernel) clReleaseKernel(store2_kernel); + printf("Freed all kernels\n"); fft_time.valid = 1; return fft_time; } diff --git a/examples/common/verify_fftw.c b/examples/common/verify_fftw.c index 2b22218..1e268d2 100644 --- a/examples/common/verify_fftw.c +++ b/examples/common/verify_fftw.c @@ -14,7 +14,7 @@ * \param inverse: 1 if inverse * \return true if verification passed */ -bool verify_sp_fft3d_fftw(float2 *fpgaout, float2 *verify, int N, int inverse){ +bool verify_sp_fft3d_fftw(float2 *fpgaout, float2 *verify, int N, int inverse, int how_many){ // Copy inp data to verify using FFTW // requires allocating data specifically for FFTW computation @@ -54,7 +54,7 @@ bool verify_sp_fft3d_fftw(float2 *fpgaout, float2 *verify, int N, int inverse){ mag_sum += magnitude; noise_sum += noise; #ifndef NDEBUG - printf("%zu : fpga - (%e %e) cpu - (%e %e)\n", i, fpgaout[i].x, fpgaout[i].y, fftw_data[i][0], fftw_data[i][1]); + //printf("%zu : fpga - (%e %e) cpu - (%e %e)\n", i, fpgaout[i].x, fpgaout[i].y, fftw_data[i][0], fftw_data[i][1]); #endif } @@ -126,7 +126,7 @@ bool verify_sp_fft2d_fftw(float2 *fpgaout, float2 *verify, int N, int inverse){ mag_sum += magnitude; noise_sum += noise; #ifndef NDEBUG - printf("%zu : fpga - (%e %e) cpu - (%e %e)\n", i, fpgaout[i].x, fpgaout[i].y, fftw_data[i][0], fftw_data[i][1]); + //printf("%zu : fpga - (%e %e) cpu - (%e %e)\n", i, fpgaout[i].x, fpgaout[i].y, fftw_data[i][0], fftw_data[i][1]); #endif } diff --git a/examples/common/verify_fftw.h b/examples/common/verify_fftw.h index 9ce7cae..76896c9 100644 --- a/examples/common/verify_fftw.h +++ b/examples/common/verify_fftw.h @@ -5,6 +5,6 @@ int verify_sp_fft2d_fftw(float2 *fpgaout, float2 *verify, int N, int inverse); -int verify_sp_fft3d_fftw(float2 *fpgaout, float2 *verify, int N, int inverse); +int verify_sp_fft3d_fftw(float2 *fpgaout, float2 *verify, int N, int inverse, int how_many); #endif // FFT3D_FFTW_H \ No newline at end of file diff --git a/examples/fft3d.c b/examples/fft3d.c index 880e1ad..db27261 100755 --- a/examples/fft3d.c +++ b/examples/fft3d.c @@ -100,7 +100,7 @@ int main(int argc, const char **argv) { } #ifdef USE_FFTW - if(!verify_sp_fft3d_fftw(out, inp, N, inv)){ + if(!verify_sp_fft3d_fftw(out, inp, N, inv, 1)){ fprintf(stderr, "3d FFT Verification Failed \n"); free(inp); free(out); diff --git a/examples/fft3d_test.c b/examples/fft3d_test.c index 6900e92..7aa712b 100755 --- a/examples/fft3d_test.c +++ b/examples/fft3d_test.c @@ -18,7 +18,7 @@ static const char *const usage[] = { }; int main(int argc, const char **argv) { - int N = 64, dim = 3, iter = 1, inv = 0, sp = 0, use_bram = 0, interleaving = 0; + int N = 64, dim = 3, iter = 1, inv = 0, sp = 0, use_bram = 0, interleaving = 0, batch = 1; char *path = "fft3d_emulate.aocx"; const char *platform; fpga_t timing = {0.0, 0.0, 0.0, 0}; @@ -35,6 +35,7 @@ int main(int argc, const char **argv) { OPT_INTEGER('i',"iter", &iter, "Iterations"), OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), OPT_BOOLEAN('v',"svm", &use_svm, "Use SVM"), + OPT_BOOLEAN('c',"batch", &batch, "Batch"), OPT_BOOLEAN('m',"bram", &use_bram, "Use BRAM"), OPT_BOOLEAN('t',"interleaving", &interleaving, "Use burst interleaving in case of BRAM designs"), OPT_STRING('p', "path", &path, "Path to bitstream"), @@ -73,11 +74,11 @@ int main(int argc, const char **argv) { for(size_t i = 0; i < iter; i++){ // create and destroy data every iteration - size_t inp_sz = sizeof(float2) * N * N * N * 2; + size_t inp_sz = sizeof(float2) * N * N * N * batch; float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); - status = fftf_create_data(inp, N * N * N * 2); + status = fftf_create_data(inp, N * N * N * batch); if(!status){ fprintf(stderr, "Error in Data Creation \n"); free(inp); @@ -87,12 +88,12 @@ int main(int argc, const char **argv) { // use ddr for 3d Transpose temp_timer = getTimeinMilliseconds(); - timing = fftfpgaf_c2c_3d_ddr_test(N, inp, out, inv); + timing = fftfpgaf_c2c_3d_ddr_svm_batch(N, inp, out, inv, batch); total_api_time += getTimeinMilliseconds() - temp_timer; #ifdef USE_FFTW - - if(!verify_sp_fft3d_fftw(out, inp, N, inv)){ + printf("FFTW Validation\n"); + if(!verify_sp_fft3d_fftw(out, inp, N, inv, 1)){ fprintf(stderr, "3d FFT Verification Failed \n"); free(inp); free(out); @@ -112,7 +113,7 @@ int main(int argc, const char **argv) { out_test[i].y = out[str + i].y; } - if(!verify_sp_fft3d_fftw(out_test, inp_test, N, inv)){ + if(!verify_sp_fft3d_fftw(out_test, inp_test, N, inv, 1)){ fprintf(stderr, "3d FFT Verification Failed for second batch\n"); free(inp_test); free(out_test); diff --git a/tests/test_fft3d_fpga.cpp b/tests/test_fft3d_fpga.cpp index 5901a6d..63ecbc6 100644 --- a/tests/test_fft3d_fpga.cpp +++ b/tests/test_fft3d_fpga.cpp @@ -57,7 +57,7 @@ TEST(fft3dFPGATest, CorrectnessBRAM){ fft_time = fftfpgaf_c2c_3d_bram(N, inp, out, 0, 0); - int result = verify_sp_fft3d_fftw(out, inp, N, 0); + int result = verify_sp_fft3d_fftw(out, inp, N, 0, 1); EXPECT_EQ(result, 1); @@ -114,7 +114,77 @@ TEST(fftFPGATest, ValidSp3dFFTDDR){ fft_time = fftfpgaf_c2c_3d_ddr(N, inp, out, 0); - int result = verify_sp_fft3d_fftw(out, inp, N, 0); + int result = verify_sp_fft3d_fftw(out, inp, N, 0, 1); + + EXPECT_EQ(result, 1); + + free(inp); + free(out); + + fpga_final(); +#endif +} + +/** + * \brief fftfpgaf_c2c_3d_ddr_svm_batch() + */ +TEST(fft3dFPGATest, InputValidityDDRSVMBatch){ + const int N = 64; + + size_t sz = sizeof(float2) * N * N * N* 2; + float2 *test = (float2*)malloc(sz); + fpga_t fft_time = {0.0, 0.0, 0.0, 0}; + + // null inp ptr input + fft_time = fftfpgaf_c2c_3d_ddr_svm_batch(64, NULL, test, 0, 2); + EXPECT_EQ(fft_time.valid, 0); + + // null out ptr input + fft_time = fftfpgaf_c2c_3d_ddr_svm_batch(64, test, NULL, 0, 2); + EXPECT_EQ(fft_time.valid, 0); + + // if N not a power of 2 + fft_time = fftfpgaf_c2c_3d_ddr_svm_batch(63, test, test, 0, 2); + EXPECT_EQ(fft_time.valid, 0); + + // howmany is 0 + fft_time = fftfpgaf_c2c_3d_ddr_svm_batch(63, test, test, 0, 0); + EXPECT_EQ(fft_time.valid, 0); + + // howmany is negative + fft_time = fftfpgaf_c2c_3d_ddr_svm_batch(63, test, test, 0, -1); + EXPECT_EQ(fft_time.valid, 0); + + free(test); +} + + +/** + * \brief fftfpgaf_c2c_3d_ddr_svm_batch() + */ +TEST(fftFPGATest, ValidSp3dFFTDDRSVMBatch){ + // check correctness of output for a random number of batches +#ifdef USE_FFTW + // malloc data to input + const int N = (1 << 6); + fpga_t fft_time = {0.0, 0.0, 0.0, 0}; + + int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft3d_ddr_triv/fft3d_ddr_triv.aocx", 0); + ASSERT_EQ(isInit, 0); + + // Random number of batches between 1 and 10 + int how_many = (rand() % 10) + 1; + size_t sz = sizeof(float2) * N * N * N * how_many; + unsigned num_pts = how_many * N * N * N; + + float2 *inp = (float2*)fftfpgaf_complex_malloc(sz); + float2 *out = (float2*)fftfpgaf_complex_malloc(sz); + + fftf_create_data(inp, num_pts); + + fft_time = fftfpgaf_c2c_3d_ddr_svm_batch(N, inp, out, 0, how_many); + + int result = verify_sp_fft3d_fftw(out, inp, N, 0, how_many); EXPECT_EQ(result, 1); From 6f2fd40e8f1270e2649d781f6f04d962e539fcab Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Mon, 6 Jul 2020 17:51:59 +0200 Subject: [PATCH 12/76] working batch fft3d svm --- api/src/fftfpga.c | 158 +++++++++---------- api/src/svm.c | 1 - examples/common/helper.c | 3 +- examples/common/helper.h | 2 +- examples/common/verify_fftw.c | 16 +- examples/fft1d.c | 5 +- examples/fft2d.c | 5 +- examples/fft3d.c | 5 +- examples/{fft3d_test.c => fft3d_svm_batch.c} | 29 +--- 9 files changed, 102 insertions(+), 122 deletions(-) rename examples/{fft3d_test.c => fft3d_svm_batch.c} (79%) diff --git a/api/src/fftfpga.c b/api/src/fftfpga.c index 42dda8b..bb9ecb5 100755 --- a/api/src/fftfpga.c +++ b/api/src/fftfpga.c @@ -1067,7 +1067,7 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, float2 *inp, float2 *out, int inv, i // Device memory buffers: double buffers cl_mem d_outData_0; - d_outData_0 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + d_outData_0 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); checkError(status, "Failed to allocate output device buffer\n"); cl_mem d_outData_1; @@ -1089,14 +1089,11 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, float2 *inp, float2 *out, int inv, i for(size_t j = 0; j < num_pts; j++){ h_inData[i][j].x = inp[stride + j].x; h_inData[i][j].y = inp[stride + j].y; - - //printf("%lu: %lu: (%f, %f)\n", i, j, h_inData[i][j].x, h_inData[i][j].y); } status = clEnqueueSVMUnmap(queue1, (void *)h_inData[i], 0, NULL, NULL); checkError(status, "Failed to unmap input data"); } - printf("Transferred data to SVM buffers \n"); /* * kernel arguments @@ -1114,16 +1111,6 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, float2 *inp, float2 *out, int inv, i status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_outData_0); checkError(status, "Failed to set store1 kernel arg"); - // kernel fetches from DDR memory - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData_0); - checkError(status, "Failed to set fetch2 kernel arg"); - status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fftc kernel arg"); - - // kernel stores using SVM based PCIe to host - status = clSetKernelArgSVMPointer(store2_kernel, 0, (void *)h_outData[0]); - checkError(status, "Failed to set store2 kernel arg"); - /* * First batch write phase */ @@ -1143,24 +1130,44 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, float2 *inp, float2 *out, int inv, i status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); checkError(status, "Failed to launch second transpose kernel"); - // Wait for all command queues to complete pending events - status = clFinish(queue1); - checkError(status, "failed to finish"); - status = clFinish(queue2); - checkError(status, "failed to finish"); - status = clFinish(queue3); - checkError(status, "failed to finish"); - status = clFinish(queue4); - checkError(status, "failed to finish"); - status = clFinish(queue5); - checkError(status, "failed to finish"); - - printf("First iter write phase complete \n"); for(size_t i = 1; i < how_many; i++){ /* - * write phase of current iteration - */ + * Read phase of previous iteration + */ + // kernel fetches from DDR memory + // kernel stores using SVM based PCIe to host + if( (i % 2) == 1){ + // if odd number of batches + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData_0); + checkError(status, "Failed to set fetch2 kernel arg"); + + // Start fetch2 phase with same queue as store1 + status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + } + else{ + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData_1); + checkError(status, "Failed to set fetch2 kernel arg"); + + // Start fetch2 phase with same queue as store1 + status = clEnqueueTask(queue8, fetch2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + } + status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftc kernel arg"); + status = clSetKernelArgSVMPointer(store2_kernel, 0, (void *)h_outData[i-1]); + checkError(status, "Failed to set store2 kernel arg"); + + status = clEnqueueTask(queue6, fftc_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue7, store2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + /* + * write phase of current iteration + */ // change write phase host and ddr ptrs status = clSetKernelArgSVMPointer(fetch1_kernel, 0, (void *)h_inData[i]); checkError(status, "Failed to set fetch1 kernel arg"); @@ -1186,67 +1193,57 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, float2 *inp, float2 *out, int inv, i status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); checkError(status, "Failed to launch second fft kernel"); - status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second transpose kernel"); - - /* - * Read phase of previous iteration - */ - status = clEnqueueTask(queue6, fetch2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - - status = clEnqueueTask(queue7, fftc_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue8, store2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); - - status = clFinish(queue1); - checkError(status, "failed to finish"); - status = clFinish(queue2); - checkError(status, "failed to finish"); - status = clFinish(queue3); - checkError(status, "failed to finish"); - status = clFinish(queue4); - checkError(status, "failed to finish"); - status = clFinish(queue5); - checkError(status, "failed to finish"); - status = clFinish(queue6); - checkError(status, "failed to finish"); - status = clFinish(queue7); - checkError(status, "failed to finish"); - status = clFinish(queue8); - checkError(status, "failed to finish"); - - // Change read phase ptrs to current iteration - if( (i % 2) == 1){ - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData_1); - checkError(status, "Failed to set fetch2 kernel arg"); + if(i % 2 == 1){ + status = clEnqueueTask(queue8, store1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second transpose kernel"); } else{ - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData_0); - checkError(status, "Failed to set fetch2 kernel arg"); + status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second transpose kernel"); } - status = clSetKernelArgSVMPointer(store2_kernel, 0, (void *)h_outData[i]); - checkError(status, "Failed to set store2 kernel arg"); } - printf("Read phase left \n"); - status = clEnqueueTask(queue6, fetch2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); + if(how_many % 2 == 1){ + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData_0); + checkError(status, "Failed to set fetch2 kernel arg"); - status = clEnqueueTask(queue7, fftc_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + } + else{ + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData_1); + checkError(status, "Failed to set fetch2 kernel arg"); + status = clEnqueueTask(queue8, fetch2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + } + status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftc kernel arg"); + status = clSetKernelArgSVMPointer(store2_kernel, 0, (void *)h_outData[how_many-1]); + checkError(status, "Failed to set store2 kernel arg"); + + status = clEnqueueTask(queue6, fftc_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fft kernel"); - status = clEnqueueTask(queue8, store2_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue7, store2_kernel, 0, NULL, NULL); checkError(status, "Failed to launch transpose kernel"); + status = clFinish(queue1); + checkError(status, "Failed to finish queue1"); + status = clFinish(queue2); + checkError(status, "Failed to finish queue2"); + status = clFinish(queue3); + checkError(status, "Failed to finish queue1"); + status = clFinish(queue4); + checkError(status, "Failed to finish queue2"); + status = clFinish(queue5); + checkError(status, "Failed to finish queue1"); status = clFinish(queue6); - checkError(status, "failed to finish"); + checkError(status, "Failed to finish queue2"); status = clFinish(queue7); - checkError(status, "failed to finish"); + checkError(status, "Failed to finish queue1"); status = clFinish(queue8); - checkError(status, "failed to finish"); + checkError(status, "Failed to finish queue2"); + fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; for(size_t i = 0; i < how_many; i++){ @@ -1255,27 +1252,23 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, float2 *inp, float2 *out, int inv, i (void *)h_outData[i], sizeof(float2) * num_pts, 0, NULL, NULL); checkError(status, "Failed to map out data"); - size_t stride = how_many * num_pts; + size_t stride = i * num_pts; for(size_t j = 0; j < num_pts; j++){ out[stride + j].x = h_outData[i][j].x; out[stride + j].y = h_outData[i][j].y; - printf("%lu: %lu: (%f, %f)\n", i, j, out[stride+j].x, out[stride+j].y); } status = clEnqueueSVMUnmap(queue2, (void *)h_outData[i], 0, NULL, NULL); checkError(status, "Failed to unmap out data"); } - printf("written to SVM phase left \n"); for(size_t i = 0; i < how_many; i++){ clSVMFree(context, h_inData[i]); clSVMFree(context, h_outData[i]); } - printf("Freed SVM buffer \n"); queue_cleanup(); - printf("Freeing ddr buffer\n"); if (d_outData_0) clReleaseMemObject(d_outData_0); if (d_outData_1) @@ -1301,7 +1294,6 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, float2 *inp, float2 *out, int inv, i if(store2_kernel) clReleaseKernel(store2_kernel); - printf("Freed all kernels\n"); fft_time.valid = 1; return fft_time; } diff --git a/api/src/svm.c b/api/src/svm.c index 76f316e..3184aad 100644 --- a/api/src/svm.c +++ b/api/src/svm.c @@ -34,7 +34,6 @@ bool check_valid_svm_device(cl_device_id device){ &sz_return ); checkError(status, "Failed to get device info"); - printf("SVM capabilities: %lu, size %lu \n", caps, sz_return); if (caps && CL_DEVICE_SVM_COARSE_GRAIN_BUFFER){ return true; diff --git a/examples/common/helper.c b/examples/common/helper.c index a8cd626..500b522 100755 --- a/examples/common/helper.c +++ b/examples/common/helper.c @@ -60,7 +60,7 @@ bool fft_create_data(double2 *inp, int N){ * \param sp: 1, single precision floating point transformation * \param use_bram: 1 if transpose uses BRAM, not DDR (valid for 2d and 3d FFT) */ -void print_config(int N, int dim, int iter, int inv, int sp, int use_bram){ +void print_config(int N, int dim, int iter, int inv, int sp, int batch, int use_bram){ printf("\n------------------------------------------\n"); printf("FFT Configuration: \n"); printf("--------------------------------------------\n"); @@ -69,6 +69,7 @@ void print_config(int N, int dim, int iter, int inv, int sp, int use_bram){ printf("Precision = %s \n", sp==1 ? "Single": "Double"); printf("Direction = %s \n", inv ? "Backward":"Forward"); printf("Placement = In Place \n"); + printf("Batch = %d \n", batch); printf("Iterations = %d \n", iter); printf("Transpose = %s \n", use_bram ? "BRAM":"DDR"); printf("--------------------------------------------\n\n"); diff --git a/examples/common/helper.h b/examples/common/helper.h index 8c75a7e..6ec4a71 100755 --- a/examples/common/helper.h +++ b/examples/common/helper.h @@ -10,7 +10,7 @@ bool fftf_create_data(float2 *inp, int N); bool fft_create_data(double2 *inp, int N); -void print_config(int N, int dim, int iter, int inv, int sp, int use_bram); +void print_config(int N, int dim, int iter, int inv, int sp, int batch, int use_bram); void display_measures(double total_api_time, double pcie_rd, double pcie_wr, double exec, int N, int dim, int iter, int inv, int sp); diff --git a/examples/common/verify_fftw.c b/examples/common/verify_fftw.c index 1e268d2..a5910de 100644 --- a/examples/common/verify_fftw.c +++ b/examples/common/verify_fftw.c @@ -12,13 +12,14 @@ * \param fftw_data: pointer to fft3d sized allocation of sp complex data for fftw cpu computation * \param N: number of points per dimension of FFT3d * \param inverse: 1 if inverse + * \param how_many: batch, default is 1 * \return true if verification passed */ bool verify_sp_fft3d_fftw(float2 *fpgaout, float2 *verify, int N, int inverse, int how_many){ // Copy inp data to verify using FFTW // requires allocating data specifically for FFTW computation - size_t num_pts = N * N * N; + size_t num_pts = how_many * N * N * N; fftwf_complex *fftw_data = fftwf_alloc_complex(num_pts); for(size_t i = 0; i < num_pts; i++){ @@ -29,12 +30,19 @@ bool verify_sp_fft3d_fftw(float2 *fpgaout, float2 *verify, int N, int inverse, i // Compute 3d FFT using FFTW // Create Plan using simple heuristic and in place FFT fftwf_plan plan; + int rank = 3; + const int n[] = {N, N, N}; + int howmany = how_many; + int idist = N*N*N, odist = N*N*N; + int istride = 1, ostride = 1; // contiguous in memory if(inverse){ - plan = fftwf_plan_dft_3d( N, N, N, &fftw_data[0], &fftw_data[0], FFTW_BACKWARD, FFTW_ESTIMATE); + plan = fftwf_plan_many_dft(rank, n, howmany, &fftw_data[0], NULL, istride, idist, fftw_data, NULL, ostride, odist, FFTW_BACKWARD, FFTW_ESTIMATE); + //plan = fftwf_plan_dft_3d( N, N, N, &fftw_data[0], &fftw_data[0], FFTW_BACKWARD, FFTW_ESTIMATE); } else{ - plan = fftwf_plan_dft_3d( N, N, N, &fftw_data[0], &fftw_data[0], FFTW_FORWARD, FFTW_ESTIMATE); + plan = fftwf_plan_many_dft(rank, n, howmany, &fftw_data[0], NULL, istride, idist, fftw_data, NULL, ostride, odist, FFTW_FORWARD, FFTW_ESTIMATE); + //plan = fftwf_plan_dft_3d( N, N, N, &fftw_data[0], &fftw_data[0], FFTW_FORWARD, FFTW_ESTIMATE); } // Execute in place FFTW based on plan created @@ -54,7 +62,7 @@ bool verify_sp_fft3d_fftw(float2 *fpgaout, float2 *verify, int N, int inverse, i mag_sum += magnitude; noise_sum += noise; #ifndef NDEBUG - //printf("%zu : fpga - (%e %e) cpu - (%e %e)\n", i, fpgaout[i].x, fpgaout[i].y, fftw_data[i][0], fftw_data[i][1]); + printf("%zu : fpga - (%e %e) cpu - (%e %e)\n", i, fpgaout[i].x, fpgaout[i].y, fftw_data[i][0], fftw_data[i][1]); #endif } diff --git a/examples/fft1d.c b/examples/fft1d.c index 4899369..5663be9 100644 --- a/examples/fft1d.c +++ b/examples/fft1d.c @@ -17,7 +17,7 @@ static const char *const usage[] = { }; int main(int argc, const char **argv) { - int N = 64, dim = 1, iter = 1, inv = 0, sp = 0, use_bram; + int N = 64, dim = 1, iter = 1, inv = 0, sp = 0, batch = 1, use_bram; char *path = "fft1d_emulate.aocx"; const char *platform = "Intel(R) FPGA"; fpga_t timing = {0.0, 0.0, 0.0, 0}; @@ -32,6 +32,7 @@ int main(int argc, const char **argv) { OPT_INTEGER('i',"iter", &iter, "Iterations"), OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), OPT_BOOLEAN('v',"svm", &use_svm, "Use SVM"), + OPT_INTEGER('c',"batch", &batch, "Batch"), OPT_BOOLEAN('m',"bram", &use_bram, "Use BRAM"), OPT_STRING('p', "path", &path, "Path to bitstream"), OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), @@ -44,7 +45,7 @@ int main(int argc, const char **argv) { argc = argparse_parse(&argparse, argc, argv); // Print to console the configuration chosen to execute during runtime - print_config(N, dim, iter, inv, sp, use_bram); + print_config(N, dim, iter, inv, sp, batch, use_bram); if(use_emulator){ platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; diff --git a/examples/fft2d.c b/examples/fft2d.c index 3505cba..bc789a5 100644 --- a/examples/fft2d.c +++ b/examples/fft2d.c @@ -18,7 +18,7 @@ static const char *const usage[] = { }; int main(int argc, const char **argv) { - int N = 64, dim = 2, iter = 1, inv = 0, sp = 0, use_bram = 0, interleaving = 0; + int N = 64, dim = 2, iter = 1, inv = 0, sp = 0, use_bram = 0, batch = 1, interleaving = 0; char *path = "fft2d_emulate.aocx"; const char *platform = "Intel(R) FPGA"; fpga_t timing = {0.0, 0.0, 0.0, 0}; @@ -35,6 +35,7 @@ int main(int argc, const char **argv) { OPT_INTEGER('i',"iter", &iter, "Iterations"), OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), OPT_BOOLEAN('v',"svm", &use_svm, "Use SVM"), + OPT_INTEGER('c',"batch", &batch, "Batch"), OPT_BOOLEAN('m',"bram", &use_bram, "Use BRAM"), OPT_BOOLEAN('t',"interleaving", &interleaving, "Use burst interleaving in case of BRAM designs"), OPT_STRING('p', "path", &path, "Path to bitstream"), @@ -48,7 +49,7 @@ int main(int argc, const char **argv) { argc = argparse_parse(&argparse, argc, argv); // Print to console the configuration chosen to execute during runtime - print_config(N, dim, iter, inv, sp, use_bram); + print_config(N, dim, iter, inv, sp, batch, use_bram); if(use_emulator){ platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; diff --git a/examples/fft3d.c b/examples/fft3d.c index db27261..96ab6fe 100755 --- a/examples/fft3d.c +++ b/examples/fft3d.c @@ -18,7 +18,7 @@ static const char *const usage[] = { }; int main(int argc, const char **argv) { - int N = 64, dim = 3, iter = 1, inv = 0, sp = 0, use_bram = 0, interleaving = 0; + int N = 64, dim = 3, iter = 1, inv = 0, sp = 0, use_bram = 0, batch = 1,interleaving = 0; char *path = "fft3d_emulate.aocx"; const char *platform; fpga_t timing = {0.0, 0.0, 0.0, 0}; @@ -35,6 +35,7 @@ int main(int argc, const char **argv) { OPT_INTEGER('i',"iter", &iter, "Iterations"), OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), OPT_BOOLEAN('v',"svm", &use_svm, "Use SVM"), + OPT_INTEGER('c',"batch", &batch, "Batch"), OPT_BOOLEAN('m',"bram", &use_bram, "Use BRAM"), OPT_BOOLEAN('t',"interleaving", &interleaving, "Use burst interleaving in case of BRAM designs"), OPT_STRING('p', "path", &path, "Path to bitstream"), @@ -48,7 +49,7 @@ int main(int argc, const char **argv) { argc = argparse_parse(&argparse, argc, argv); // Print to console the configuration chosen to execute during runtime - print_config(N, dim, iter, inv, sp, use_bram); + print_config(N, dim, iter, inv, sp, batch, use_bram); if(use_emulator){ platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; diff --git a/examples/fft3d_test.c b/examples/fft3d_svm_batch.c similarity index 79% rename from examples/fft3d_test.c rename to examples/fft3d_svm_batch.c index 7aa712b..a622ca8 100755 --- a/examples/fft3d_test.c +++ b/examples/fft3d_svm_batch.c @@ -35,7 +35,7 @@ int main(int argc, const char **argv) { OPT_INTEGER('i',"iter", &iter, "Iterations"), OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), OPT_BOOLEAN('v',"svm", &use_svm, "Use SVM"), - OPT_BOOLEAN('c',"batch", &batch, "Batch"), + OPT_INTEGER('c',"batch", &batch, "Batch"), OPT_BOOLEAN('m',"bram", &use_bram, "Use BRAM"), OPT_BOOLEAN('t',"interleaving", &interleaving, "Use burst interleaving in case of BRAM designs"), OPT_STRING('p', "path", &path, "Path to bitstream"), @@ -49,7 +49,7 @@ int main(int argc, const char **argv) { argc = argparse_parse(&argparse, argc, argv); // Print to console the configuration chosen to execute during runtime - print_config(N, dim, iter, inv, sp, use_bram); + print_config(N, dim, iter, inv, sp, batch, use_bram); if(use_emulator){ platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; @@ -92,35 +92,12 @@ int main(int argc, const char **argv) { total_api_time += getTimeinMilliseconds() - temp_timer; #ifdef USE_FFTW - printf("FFTW Validation\n"); - if(!verify_sp_fft3d_fftw(out, inp, N, inv, 1)){ + if(!verify_sp_fft3d_fftw(out, inp, N, inv, batch)){ fprintf(stderr, "3d FFT Verification Failed \n"); free(inp); free(out); return EXIT_FAILURE; } - - size_t inp_test_sz = sizeof(float2) * N * N * N; - float2 *inp_test = (float2*)fftfpgaf_complex_malloc(inp_test_sz); - float2 *out_test = (float2*)fftfpgaf_complex_malloc(inp_test_sz); - - size_t str = N * N * N; - for(size_t i = 0; i < (N*N*N); i++){ - inp_test[i].x = inp[str + i].x; - inp_test[i].y = inp[str + i].y; - - out_test[i].x = out[str + i].x; - out_test[i].y = out[str + i].y; - } - - if(!verify_sp_fft3d_fftw(out_test, inp_test, N, inv, 1)){ - fprintf(stderr, "3d FFT Verification Failed for second batch\n"); - free(inp_test); - free(out_test); - return EXIT_FAILURE; - } - free(inp_test); - free(out_test); #endif if(timing.valid == 0){ fprintf(stderr, "Invalid execution, timing found to be 0"); From 7368cdc7b8a913c0e118196b254b2eabf0225789 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Mon, 6 Jul 2020 18:44:49 +0200 Subject: [PATCH 13/76] cmake fix for ci --- examples/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 1dc068e..899e5a8 100755 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -4,7 +4,7 @@ project(examplesfftfpga VERSION 0.1 DESCRIPTION "Example Code that uses libfftfpga" LANGUAGES C CXX) -set(examples fft3d fft2d fft1d fft3d_test) +set(examples fft3d fft2d fft1d fft3d_svm_batch) # create a target for each of the example foreach(example ${examples}) From 9731719237f84151f30e54e0848040d0f9094c90 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Tue, 4 Aug 2020 22:19:50 +0200 Subject: [PATCH 14/76] fft2d with opt matrix transpose --- kernels/CMakeLists.txt | 1 + kernels/common/fft_config.h.in | 4 +- kernels/fft1d/fft1d.cl | 4 - kernels/fft2d/CMakeLists.txt | 2 +- kernels/fft2d/fft2d_bram.cl | 7 - kernels/fft2d/fft2d_bram_opt.cl | 370 +++++++++++++++++++++ kernels/fft2d/fft2d_ddr.cl | 3 - kernels/fft3d/fft3d_bram.cl | 11 +- kernels/fft3d/fft3d_ddr_triv.cl | 9 - kernels/matrixTranspose/diagonal_bitrev.cl | 168 ++++++++++ 10 files changed, 546 insertions(+), 33 deletions(-) create mode 100644 kernels/fft2d/fft2d_bram_opt.cl create mode 100644 kernels/matrixTranspose/diagonal_bitrev.cl diff --git a/kernels/CMakeLists.txt b/kernels/CMakeLists.txt index 5fcfb31..fd14810 100644 --- a/kernels/CMakeLists.txt +++ b/kernels/CMakeLists.txt @@ -14,6 +14,7 @@ set(LOG_FFT_SIZE 6 CACHE STRING "Log of points of FFT") set_property(CACHE LOG_FFT_SIZE PROPERTY STRINGS 4 5 6 7 8 9) math(EXPR FFT_SIZE "1 << ${LOG_FFT_SIZE}") message("-- FFT size is ${FFT_SIZE}") +math(EXPR DEPTH "1 << (${LOG_FFT_SIZE} + ${LOG_FFT_SIZE} - ${LOG_POINTS})") #set(BUF_LOC "DDR") set(BUFFER_LOCATION "DDR" CACHE STRING "Buffer location of 3d Transpose") diff --git a/kernels/common/fft_config.h.in b/kernels/common/fft_config.h.in index 1602c0d..ae30dc5 100755 --- a/kernels/common/fft_config.h.in +++ b/kernels/common/fft_config.h.in @@ -7,7 +7,9 @@ #define POINTS @POINTS@ #define LOGN @LOG_FFT_SIZE@ -#define FFT_SIZE @FFT_SIZE@ +#define N @FFT_SIZE@ + +#define DEPTH @DEPTH@ #define BUFFER_LOCATION "@BUFFER_LOCATION@" diff --git a/kernels/fft1d/fft1d.cl b/kernels/fft1d/fft1d.cl index 662475f..e55ee1f 100644 --- a/kernels/fft1d/fft1d.cl +++ b/kernels/fft1d/fft1d.cl @@ -140,7 +140,6 @@ uint permute_gid (uint gid) { __attribute__((reqd_work_group_size(CONT_FACTOR * POINTS, 1, 1))) kernel void fetch(global float2 * restrict src) { - const int N = (1 << LOGN); // Each thread will fetch POINTS points. Need POINTS times to pass to FFT. const int BUF_SIZE = 1 << (LOG_CONT_FACTOR + LOGPOINTS + LOGPOINTS); @@ -180,12 +179,9 @@ kernel void fetch(global float2 * restrict src) { * 'inverse' toggles between the direct and the inverse transform */ -__attribute((task)) kernel void fft1d(global float2 * restrict dest, int count, int inverse) { - const int N = (1 << LOGN); - /* The FFT engine requires a sliding window array for data reordering; data * stored in this array is carried across loop iterations and shifted by one * element every iteration; all loop dependencies derived from the uses of diff --git a/kernels/fft2d/CMakeLists.txt b/kernels/fft2d/CMakeLists.txt index 793ccac..2f893d5 100644 --- a/kernels/fft2d/CMakeLists.txt +++ b/kernels/fft2d/CMakeLists.txt @@ -9,7 +9,7 @@ cmake_minimum_required(VERSION 3.10) # - ${kernel_name}_syn: to generate synthesis binary ## set(CL_PATH "${fftkernelsfpga_SOURCE_DIR}/fft2d") -set(kernels fft2d_bram fft2d_ddr) +set(kernels fft2d_bram fft2d_ddr fft2d_bram_opt) include(${fftkernelsfpga_SOURCE_DIR}/cmake/genKernelTargets.cmake) diff --git a/kernels/fft2d/fft2d_bram.cl b/kernels/fft2d/fft2d_bram.cl index 32b069c..72f3b6c 100644 --- a/kernels/fft2d/fft2d_bram.cl +++ b/kernels/fft2d/fft2d_bram.cl @@ -25,7 +25,6 @@ int bit_reversed(int x, int bits) { // Kernel that fetches data from global memory kernel void fetch(global volatile float2 * restrict src) { - const unsigned N = (1 << LOGN); for(unsigned k = 0; k < N; k++){ float2 buf[N]; @@ -53,7 +52,6 @@ kernel void fetch(global volatile float2 * restrict src) { */ kernel void fft2da(int inverse) { - const int N = (1 << LOGN); /* The FFT engine requires a sliding window for data reordering; data stored * in this array is carried across loop iterations and shifted by 1 element @@ -102,8 +100,6 @@ kernel void fft2da(int inverse) { // Transposes fetched data; stores them to global memory kernel void transpose(){ - - const unsigned N = (1 << LOGN); unsigned revcolt, where, where_write; local float2 buf[N * N]; @@ -139,8 +135,6 @@ kernel void transpose(){ } kernel void fft2db(int inverse) { - const int N = (1 << LOGN); - /* The FFT engine requires a sliding window for data reordering; data stored * in this array is carried across loop iterations and shifted by 1 element * every iteration; all loop dependencies derived from the uses of this @@ -185,7 +179,6 @@ kernel void fft2db(int inverse) { } kernel void store(global volatile float2 * restrict dest){ - const unsigned N = (1 << LOGN); unsigned revcolt, where; local float2 buf[N * N]; diff --git a/kernels/fft2d/fft2d_bram_opt.cl b/kernels/fft2d/fft2d_bram_opt.cl new file mode 100644 index 0000000..a12781e --- /dev/null +++ b/kernels/fft2d/fft2d_bram_opt.cl @@ -0,0 +1,370 @@ +// Author: Arjun Ramaswami + +#include "fft_config.h" +#include "fft_8.cl" +#include "../matrixTranspose/diagonal_bitrev.cl" + +#pragma OPENCL EXTENSION cl_intel_channels : enable +channel float2 chaninfft2da[POINTS] __attribute__((depth(POINTS))); +channel float2 chaninfft2db[POINTS] __attribute__((depth(POINTS))); + +channel float2 chaninTranspose1[POINTS] __attribute__((depth(POINTS))); +channel float2 chaninTranspose2[POINTS] __attribute__((depth(POINTS))); +channel float2 chaninStore[POINTS] __attribute__((depth(POINTS))); + +// Kernel that fetches data from global memory +kernel void fetchBitrev1(global volatile float2 * restrict src) { + + for(unsigned k = 0; k < N; k++){ + float2 buf[N]; + + #pragma unroll 8 + for(unsigned i = 0; i < N; i++){ + buf[i & ((1<= N / POINTS - 1) { + write_channel_intel(chaninTranspose1[0], data.i0); + write_channel_intel(chaninTranspose1[1], data.i1); + write_channel_intel(chaninTranspose1[2], data.i2); + write_channel_intel(chaninTranspose1[3], data.i3); + write_channel_intel(chaninTranspose1[4], data.i4); + write_channel_intel(chaninTranspose1[5], data.i5); + write_channel_intel(chaninTranspose1[6], data.i6); + write_channel_intel(chaninTranspose1[7], data.i7); + } + } +} + +__attribute__((max_global_work_dim(0))) +kernel void transpose1() { + const unsigned DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 + bool is_bufA = false, is_bitrevA = false; + + float2 buf[2][DEPTH][POINTS]; + float2 bitrev_in[2][N], bitrev_out[2][N] ; + //float2 bitrev_in[2][N] __attribute__((memory("MLAB"))); + + int initial_delay = DELAY + DELAY; // for each of the bitrev buffer + + // additional iterations to fill the buffers + for(int step = -initial_delay; step < ((DEPTH) + DEPTH); step++){ + float2x8 data, data_out; + if (step < ((DEPTH) - initial_delay)) { + data.i0 = read_channel_intel(chaninTranspose1[0]); + data.i1 = read_channel_intel(chaninTranspose1[1]); + data.i2 = read_channel_intel(chaninTranspose1[2]); + data.i3 = read_channel_intel(chaninTranspose1[3]); + data.i4 = read_channel_intel(chaninTranspose1[4]); + data.i5 = read_channel_intel(chaninTranspose1[5]); + data.i6 = read_channel_intel(chaninTranspose1[6]); + data.i7 = read_channel_intel(chaninTranspose1[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + + // Swap buffers every N*N/8 iterations + // starting from the additional delay of N/8 iterations + is_bufA = (( (step + DELAY) & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; + + // Swap bitrev buffers every N/8 iterations + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + unsigned row = step & (DEPTH - 1); + data = bitreverse_in(data, + is_bitrevA ? bitrev_in[0] : bitrev_in[1], + is_bitrevA ? bitrev_in[1] : bitrev_in[0], + row); + + writeBuf(data, + is_bufA ? buf[0] : buf[1], + step, DELAY); + + data_out = readBuf( + is_bufA ? buf[1] : buf[0], + step); + + unsigned start_row = (step + DELAY) & (DEPTH -1); + data_out = bitreverse_out( + is_bitrevA ? bitrev_out[0] : bitrev_out[1], + is_bitrevA ? bitrev_out[1] : bitrev_out[0], + data_out, start_row); + + if (step >= (DEPTH)) { + write_channel_intel(chaninfft2db[0], data_out.i0); + write_channel_intel(chaninfft2db[1], data_out.i1); + write_channel_intel(chaninfft2db[2], data_out.i2); + write_channel_intel(chaninfft2db[3], data_out.i3); + write_channel_intel(chaninfft2db[4], data_out.i4); + write_channel_intel(chaninfft2db[5], data_out.i5); + write_channel_intel(chaninfft2db[6], data_out.i6); + write_channel_intel(chaninfft2db[7], data_out.i7); + } + } +} + +kernel void fft2db(int inverse) { + + /* The FFT engine requires a sliding window for data reordering; data stored + * in this array is carried across loop iterations and shifted by 1 element + * every iteration; all loop dependencies derived from the uses of this + * array are simple transfers between adjacent array elements + */ + + float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; + + for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { + float2x8 data; + + // Read data from channels + if (i < N * (N / POINTS)) { + data.i0 = read_channel_intel(chaninfft2db[0]); + data.i1 = read_channel_intel(chaninfft2db[1]); + data.i2 = read_channel_intel(chaninfft2db[2]); + data.i3 = read_channel_intel(chaninfft2db[3]); + data.i4 = read_channel_intel(chaninfft2db[4]); + data.i5 = read_channel_intel(chaninfft2db[5]); + data.i6 = read_channel_intel(chaninfft2db[6]); + data.i7 = read_channel_intel(chaninfft2db[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + + // Perform one FFT step + data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); + + // Write result to channels + if (i >= N / POINTS - 1) { + write_channel_intel(chaninTranspose2[0], data.i0); + write_channel_intel(chaninTranspose2[1], data.i1); + write_channel_intel(chaninTranspose2[2], data.i2); + write_channel_intel(chaninTranspose2[3], data.i3); + write_channel_intel(chaninTranspose2[4], data.i4); + write_channel_intel(chaninTranspose2[5], data.i5); + write_channel_intel(chaninTranspose2[6], data.i6); + write_channel_intel(chaninTranspose2[7], data.i7); + } + } +} + +__attribute__((max_global_work_dim(0))) +kernel void transpose2() { + const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 + bool is_bufA = false, is_bitrevA = false; + + float2 buf[2][DEPTH][POINTS]; + float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; + + int initial_delay = DELAY; // for each of the bitrev buffer + // additional iterations to fill the buffers + for(int step = -initial_delay; step < ((DEPTH) + DEPTH); step++){ + float2x8 data, data_out; + if (step < ((DEPTH) - initial_delay)) { + data.i0 = read_channel_intel(chaninTranspose2[0]); + data.i1 = read_channel_intel(chaninTranspose2[1]); + data.i2 = read_channel_intel(chaninTranspose2[2]); + data.i3 = read_channel_intel(chaninTranspose2[3]); + data.i4 = read_channel_intel(chaninTranspose2[4]); + data.i5 = read_channel_intel(chaninTranspose2[5]); + data.i6 = read_channel_intel(chaninTranspose2[6]); + data.i7 = read_channel_intel(chaninTranspose2[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + // Swap buffers every N*N/8 iterations + // starting from the additional delay of N/8 iterations + is_bufA = (( step & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; + + // Swap bitrev buffers every N/8 iterations + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + unsigned row = step & (DEPTH - 1); + data = bitreverse_in(data, + is_bitrevA ? bitrev_in[0] : bitrev_in[1], + is_bitrevA ? bitrev_in[1] : bitrev_in[0], + row); + + writeBuf(data, + is_bufA ? buf[0] : buf[1], + step, 0); + + data_out = readBuf_store( + is_bufA ? buf[1] : buf[0], + step); + + if (step >= (DEPTH)) { + write_channel_intel(chaninStore[0], data_out.i0); + write_channel_intel(chaninStore[1], data_out.i1); + write_channel_intel(chaninStore[2], data_out.i2); + write_channel_intel(chaninStore[3], data_out.i3); + write_channel_intel(chaninStore[4], data_out.i4); + write_channel_intel(chaninStore[5], data_out.i5); + write_channel_intel(chaninStore[6], data_out.i6); + write_channel_intel(chaninStore[7], data_out.i7); + } + } +} + +kernel void store(global volatile float2 * restrict dest){ + + // perform N*N writes to buffer + for(unsigned i = 0; i < N; i++){ + for(unsigned j = 0; j < (N / 8); j++){ + unsigned where = ((i << LOGN) + (j << LOGPOINTS)); + + #pragma unroll 8 + for(unsigned u = 0; u < 8; u++){ + dest[where + u] = read_channel_intel(chaninStore[u]); + } + } + } +} +/* +kernel void store(global volatile float2 * restrict dest){ + unsigned revcolt, where; + + local float2 buf[N * N]; + + // perform N*N writes to buffer + for(unsigned i = 0; i < N; i++){ + for(unsigned j = 0; j < (N / 8); j++){ + where = ((i << LOGN) + (j << LOGPOINTS)); + + #pragma unroll 8 + for(unsigned u = 0; u < 8; u++){ + buf[where + u] = read_channel_intel(chaninStore[u]); + } + } + } + + for(unsigned i = 0; i < N; i++){ + revcolt = bit_reversed(i, LOGN); + where = (i << LOGN); + + #pragma unroll 8 + for( unsigned u = 0; u < N; u++){ + dest[where + u] = buf[(u << LOGN) + revcolt]; + } + } +} +*/ +/* +kernel void store(global volatile float2 * restrict dest){ + const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 + bool is_bufA = false, is_bitrevA = false; + + float2 buf[2][DEPTH][POINTS]; + float2 bitrev_in[2][N], bitrev_out[2][N] ; + //float2 bitrev_in[2][N] __attribute__((memory("MLAB"))); + + int initial_delay = DELAY + DELAY; // for each of the bitrev buffer + + // additional iterations to fill the buffers + for(int step = -initial_delay; step < ((DEPTH) + DEPTH); step++){ + + float2x8 data, data_out; + if (step < ((DEPTH) - initial_delay)) { + data.i0 = read_channel_intel(chaninStore[0]); + data.i1 = read_channel_intel(chaninStore[1]); + data.i2 = read_channel_intel(chaninStore[2]); + data.i3 = read_channel_intel(chaninStore[3]); + data.i4 = read_channel_intel(chaninStore[4]); + data.i5 = read_channel_intel(chaninStore[5]); + data.i6 = read_channel_intel(chaninStore[6]); + data.i7 = read_channel_intel(chaninStore[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + + // Swap buffers every N*N/8 iterations + // starting from the additional delay of N/8 iterations + is_bufA = (( (step + DELAY) & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; + + // Swap bitrev buffers every N/8 iterations + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + unsigned row = step & (DEPTH - 1); + data = bitreverse_in(data, + is_bitrevA ? bitrev_in[0] : bitrev_in[1], + is_bitrevA ? bitrev_in[1] : bitrev_in[0], + row); + + writeBuf(data, + is_bufA ? buf[0] : buf[1], + step); + + data_out = readBuf( + is_bufA ? buf[1] : buf[0], + step); + + if (step >= (DEPTH)) { + unsigned index = (step - DEPTH) * 8; + printf("Store index - %d step : %d \n", index, step); + dest[index + 0] = data_out.i0; + dest[index + 1] = data_out.i1; + dest[index + 2] = data_out.i2; + dest[index + 3] = data_out.i3; + dest[index + 4] = data_out.i4; + dest[index + 5] = data_out.i5; + dest[index + 6] = data_out.i6; + dest[index + 7] = data_out.i7; + } + } + printf("Store Completed\n"); +} +*/ \ No newline at end of file diff --git a/kernels/fft2d/fft2d_ddr.cl b/kernels/fft2d/fft2d_ddr.cl index fc13557..e29d0c7 100644 --- a/kernels/fft2d/fft2d_ddr.cl +++ b/kernels/fft2d/fft2d_ddr.cl @@ -138,7 +138,6 @@ int mangle_bits(int x) { __attribute__((reqd_work_group_size((1 << LOGN), 1, 1))) kernel void fetch(global float2 * restrict src, int mangle) { - const int N = (1 << LOGN); // Local memory for storing 8 rows local float2 buf[8 * N]; @@ -203,7 +202,6 @@ kernel void fetch(global float2 * restrict src, int mangle) { */ kernel void fft2d(int inverse) { - const int N = (1 << LOGN); /* The FFT engine requires a sliding window for data reordering; data stored * in this array is carried across loop iterations and shifted by 1 element @@ -259,7 +257,6 @@ kernel void fft2d(int inverse) { __attribute__((reqd_work_group_size((1 << LOGN), 1, 1))) kernel void transpose(global float2 * restrict dest, int mangle) { - const int N = (1 << LOGN); local float2 buf[POINTS * N]; buf[8 * get_local_id(0)] = read_channel_intel(chan0); buf[8 * get_local_id(0) + 1] = read_channel_intel(chan1); diff --git a/kernels/fft3d/fft3d_bram.cl b/kernels/fft3d/fft3d_bram.cl index 0180c89..d103403 100755 --- a/kernels/fft3d/fft3d_bram.cl +++ b/kernels/fft3d/fft3d_bram.cl @@ -24,7 +24,7 @@ int bit_reversed(int x, int bits) { return y; } -void sendTofft(float2 *buffer, const unsigned N, unsigned j){ +void sendTofft(float2 *buffer, unsigned j){ write_channel_intel(chaninfft[0], buffer[j]); // 0 write_channel_intel(chaninfft[1], buffer[4 * N / 8 + j]); // 32 write_channel_intel(chaninfft[2], buffer[2 * N / 8 + j]); // 16 @@ -37,7 +37,6 @@ void sendTofft(float2 *buffer, const unsigned N, unsigned j){ // Kernel that fetches data from global memory kernel void fetch(global volatile float2 * restrict src) { - const unsigned N = (1 << LOGN); for(unsigned k = 0; k < (1 << (LOGN + LOGN)); k++){ @@ -48,7 +47,7 @@ kernel void fetch(global volatile float2 * restrict src) { } for(unsigned j = 0; j < (N / 8); j++){ - sendTofft(&buf[0], N, j); + sendTofft(&buf[0], j); } } @@ -68,7 +67,7 @@ kernel void fetch(global volatile float2 * restrict src) { } for(unsigned j = 0; j < (N / 8); j++){ - sendTofft(&buf[0], N, j); + sendTofft(&buf[0], j); } } } @@ -78,7 +77,6 @@ kernel void fetch(global volatile float2 * restrict src) { */ kernel void fft3da(int inverse) { - const int N = (1 << LOGN); /* The FFT engine requires a sliding window for data reordering; data stored * in this array is carried across loop iterations and shifted by 1 element @@ -130,7 +128,6 @@ kernel void fft3da(int inverse) { // Transposes fetched data; stores them to global memory kernel void transpose(global float2 * restrict dest) { - const unsigned N = (1 << LOGN); unsigned revcolt, where_read, where_write, where; local float2 buf[N * N]; @@ -203,7 +200,6 @@ kernel void transpose(global float2 * restrict dest) { } kernel void fft3db(int inverse) { - const int N = (1 << LOGN); /* The FFT engine requires a sliding window for data reordering; data stored * in this array is carried across loop iterations and shifted by 1 element @@ -253,7 +249,6 @@ kernel void fft3db(int inverse) { // Stores data for 3rd dim FFT kernel void transpose3d(){ - const unsigned N = (1 << LOGN); unsigned revcolt, where; unsigned where_test; diff --git a/kernels/fft3d/fft3d_ddr_triv.cl b/kernels/fft3d/fft3d_ddr_triv.cl index c90ab83..6355285 100755 --- a/kernels/fft3d/fft3d_ddr_triv.cl +++ b/kernels/fft3d/fft3d_ddr_triv.cl @@ -28,7 +28,6 @@ int bit_reversed(int x, int bits) { // Kernel that fetches data from global memory kernel void fetch1(global volatile float2 * restrict src1) { - const unsigned N = (1 << LOGN); for(unsigned k = 0; k < (N * N); k++){ float2 buf[N]; @@ -56,7 +55,6 @@ kernel void fetch1(global volatile float2 * restrict src1) { */ kernel void fft3da(int inverse) { - const int N = (1 << LOGN); /* The FFT engine requires a sliding window for data reordering; data stored * in this array is carried across loop iterations and shifted by 1 element @@ -108,7 +106,6 @@ kernel void fft3da(int inverse) { // Transposes fetched data; stores them to global memory kernel void transpose(){ - const unsigned N = (1 << LOGN); unsigned revcolt, where, where_write; local float2 buf[N * N]; @@ -151,7 +148,6 @@ kernel void transpose(){ } kernel void fft3db(int inverse) { - const int N = (1 << LOGN); /* The FFT engine requires a sliding window for data reordering; data stored * in this array is carried across loop iterations and shifted by 1 element @@ -204,7 +200,6 @@ kernel void fft3db(int inverse) { __kernel void store1(__global __attribute__((buffer_location(BUFFER_LOCATION))) volatile float2 * restrict dest1){ - const unsigned N = (1 << LOGN); local float2 buf[N * N]; for(unsigned zdim = 0; zdim < N; zdim++){ @@ -243,7 +238,6 @@ void store1(__global __attribute__((buffer_location(BUFFER_LOCATION))) volatile __kernel void fetch2(__global __attribute__((buffer_location(BUFFER_LOCATION))) volatile float2 * restrict src2){ - const unsigned N = (1 << LOGN); local float2 buf[N * N]; for(unsigned ydim = 0; ydim < N; ydim++){ @@ -284,7 +278,6 @@ void fetch2(__global __attribute__((buffer_location(BUFFER_LOCATION))) volatile * Input and output data in bit-reversed format */ kernel void fft3dc(int inverse) { - const int N = (1 << LOGN); /* The FFT engine requires a sliding window for data reordering; data stored * in this array is carried across loop iterations and shifted by 1 element @@ -337,8 +330,6 @@ kernel void fft3dc(int inverse) { */ kernel void store2(global float2 * restrict dest2){ - const unsigned N = (1 << LOGN); - local float2 buf[N * N]; for(unsigned ydim = 0; ydim < N; ydim++){ diff --git a/kernels/matrixTranspose/diagonal_bitrev.cl b/kernels/matrixTranspose/diagonal_bitrev.cl new file mode 100644 index 0000000..1bea1d8 --- /dev/null +++ b/kernels/matrixTranspose/diagonal_bitrev.cl @@ -0,0 +1,168 @@ +// Authors: Tobias Kenter, Arjun Ramaswami + +float2x8 bitreverse_out(float2 bitrev_outA[N], float2 bitrev_outB[N], float2x8 data, unsigned row){ + float2 rotate_in[POINTS]; + + rotate_in[0] = data.i0; + rotate_in[1] = data.i1; + rotate_in[2] = data.i2; + rotate_in[3] = data.i3; + rotate_in[4] = data.i4; + rotate_in[5] = data.i5; + rotate_in[6] = data.i6; + rotate_in[7] = data.i7; + + const unsigned STEPS = (1 << (LOGN - LOGPOINTS)); + + unsigned index = (row & (STEPS - 1)) * 8; + unsigned rot = (row >> (LOGN - LOGPOINTS)) & (POINTS - 1); + + bitrev_outA[index] = rotate_in[(0 + rot) & (POINTS - 1)]; + bitrev_outA[index + 1] = rotate_in[(1 + rot) & (POINTS - 1)]; + bitrev_outA[index + 2] = rotate_in[(2 + rot) & (POINTS - 1)]; + bitrev_outA[index + 3] = rotate_in[(3 + rot) & (POINTS - 1)]; + bitrev_outA[index + 4] = rotate_in[(4 + rot) & (POINTS - 1)]; + bitrev_outA[index + 5] = rotate_in[(5 + rot) & (POINTS - 1)]; + bitrev_outA[index + 6] = rotate_in[(6 + rot) & (POINTS - 1)]; + bitrev_outA[index + 7] = rotate_in[(7 + rot) & (POINTS - 1)]; + + unsigned index_out = (row & (STEPS - 1)); + float2x8 rotate_out; + rotate_out.i0 = bitrev_outB[index_out]; + rotate_out.i1 = bitrev_outB[(4 * N / 8) + index_out]; + rotate_out.i2 = bitrev_outB[(2 * N / 8) + index_out]; + rotate_out.i3 = bitrev_outB[(6 * N / 8) + index_out]; + rotate_out.i4 = bitrev_outB[(N / 8) + index_out]; + rotate_out.i5 = bitrev_outB[(5 * N / 8) + index_out]; + rotate_out.i6 = bitrev_outB[(3 * N / 8) + index_out]; + rotate_out.i7 = bitrev_outB[(7 * N / 8) + index_out]; + + return rotate_out; +} + +float2x8 readBuf(float2 buf[DEPTH][POINTS], unsigned step){ + const unsigned DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 + + unsigned rows = (step + DELAY); + unsigned base = (rows & (N / POINTS - 1)) << LOGN; // 0, N, 2N, ... + unsigned offset = (rows >> LOGN) & ((N / 8) - 1); // 0, .. N / POINTS + + float2 rotate_out[POINTS]; + float2x8 data; + + #pragma unroll POINTS + for(unsigned i = 0; i < POINTS; i++){ + unsigned rot = ((POINTS + i - (rows >> (LOGN - LOGPOINTS))) << (LOGN - LOGPOINTS)) & (N - 1); + unsigned row_rotate = (base + offset + rot); + rotate_out[i] = buf[row_rotate][i]; + } + + data.i0 = rotate_out[0]; + data.i1 = rotate_out[1]; + data.i2 = rotate_out[2]; + data.i3 = rotate_out[3]; + data.i4 = rotate_out[4]; + data.i5 = rotate_out[5]; + data.i6 = rotate_out[6]; + data.i7 = rotate_out[7]; + + return data; +} + +unsigned bit_reversed(unsigned x, unsigned bits) { + unsigned y = 0; + #pragma unroll + for (unsigned i = 0; i < bits; i++) { + y <<= 1; + y |= x & 1; + x >>= 1; + } + return y; +} + +float2x8 bitreverse_in(float2x8 rotate_in, float2 bitrev_inA[N], float2 bitrev_inB[N], unsigned row){ + + const unsigned STEPS = (N / 8); + unsigned index = row & (STEPS - 1); // [0, N/8 - 1] + unsigned index_in = index * 8; + + bitrev_inA[index_in + 0] = rotate_in.i0; // 0 + bitrev_inA[index_in + 1] = rotate_in.i1; // 32 + bitrev_inA[index_in + 2] = rotate_in.i2; // 16 + bitrev_inA[index_in + 3] = rotate_in.i3; // 48 + bitrev_inA[index_in + 4] = rotate_in.i4; // 8 + bitrev_inA[index_in + 5] = rotate_in.i5; // 40 + bitrev_inA[index_in + 6] = rotate_in.i6; // 24 + bitrev_inA[index_in + 7] = rotate_in.i7; // 5 + + float2x8 rotate_out; + unsigned index_out = index * 8; + unsigned index0 = bit_reversed(index_out + 0, LOGN); + unsigned index1 = bit_reversed(index_out + 1, LOGN); + unsigned index2 = bit_reversed(index_out + 2, LOGN); + unsigned index3 = bit_reversed(index_out + 3, LOGN); + unsigned index4 = bit_reversed(index_out + 4, LOGN); + unsigned index5 = bit_reversed(index_out + 5, LOGN); + unsigned index6 = bit_reversed(index_out + 6, LOGN); + unsigned index7 = bit_reversed(index_out + 7, LOGN); + + rotate_out.i0 = bitrev_inB[index0]; + rotate_out.i1 = bitrev_inB[index1]; + rotate_out.i2 = bitrev_inB[index2]; + rotate_out.i3 = bitrev_inB[index3]; + rotate_out.i4 = bitrev_inB[index4]; + rotate_out.i5 = bitrev_inB[index5]; + rotate_out.i6 = bitrev_inB[index6]; + rotate_out.i7 = bitrev_inB[index7]; + + return rotate_out; +} + +void writeBuf(float2x8 data, float2 buf[DEPTH][POINTS], int step, unsigned delay){ + + float2 rot_bitrev_in[POINTS]; + + rot_bitrev_in[0] = data.i0; + rot_bitrev_in[1] = data.i1; + rot_bitrev_in[2] = data.i2; + rot_bitrev_in[3] = data.i3; + rot_bitrev_in[4] = data.i4; + rot_bitrev_in[5] = data.i5; + rot_bitrev_in[6] = data.i6; + rot_bitrev_in[7] = data.i7; + + unsigned rot = ((step + delay) >> (LOGN - LOGPOINTS)) & (POINTS - 1); + unsigned row_in = (step + delay) & (DEPTH - 1); + + #pragma unroll POINTS + for(unsigned i = 0; i < POINTS; i++){ + buf[row_in][i] = rot_bitrev_in[((i + POINTS) - rot) & (POINTS -1)]; + } +} + +float2x8 readBuf_store(float2 buf[DEPTH][POINTS], unsigned step){ + unsigned base = (step & (N / POINTS - 1)) << LOGN; // 0, N, 2N, ... + unsigned offset = (step >> LOGN) & ((N / 8) - 1); // 0, .. N / POINTS + + float2 rotate_out[POINTS]; + float2x8 data; + + #pragma unroll POINTS + for(unsigned i = 0; i < POINTS; i++){ + unsigned rot = ((POINTS + i - (step >> (LOGN - LOGPOINTS))) << (LOGN - LOGPOINTS)) & (N - 1); + unsigned row_rotate = (base + offset + rot); + rotate_out[i] = buf[row_rotate][i]; + } + + unsigned rot_out = (step >> (LOGN - LOGPOINTS)) & (POINTS - 1); + data.i0 = rotate_out[(0 + rot_out) & (POINTS - 1)]; + data.i1 = rotate_out[(1 + rot_out) & (POINTS - 1)]; + data.i2 = rotate_out[(2 + rot_out) & (POINTS - 1)]; + data.i3 = rotate_out[(3 + rot_out) & (POINTS - 1)]; + data.i4 = rotate_out[(4 + rot_out) & (POINTS - 1)]; + data.i5 = rotate_out[(5 + rot_out) & (POINTS - 1)]; + data.i6 = rotate_out[(6 + rot_out) & (POINTS - 1)]; + data.i7 = rotate_out[(7 + rot_out) & (POINTS - 1)]; + + return data; +} \ No newline at end of file From 106182f2cca2a23fc8112bd5aaa933f6794a1baa Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Wed, 5 Aug 2020 17:48:47 +0200 Subject: [PATCH 15/76] working 3D FFT with bitrev diagonal transpose --- api/src/fftfpga.c | 42 +- examples/common/verify_fftw.c | 2 +- kernels/fft3d/fft3d_ddr.cl | 704 +++++++++++++++------------------- 3 files changed, 327 insertions(+), 421 deletions(-) diff --git a/api/src/fftfpga.c b/api/src/fftfpga.c index bb9ecb5..9602923 100755 --- a/api/src/fftfpga.c +++ b/api/src/fftfpga.c @@ -509,7 +509,8 @@ fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, int inv){ fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv, int interleaving){ fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_kernel ffta_kernel = NULL, fftb_kernel = NULL; - cl_kernel fetch_kernel = NULL, transpose_kernel = NULL, store_kernel = NULL; + cl_kernel fetch_kernel = NULL, store_kernel = NULL; + cl_kernel transpose_kernel1 = NULL, transpose_kernel2 = NULL; cl_int status = 0; int num_pts = N * N; @@ -557,15 +558,18 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv, int interl int inverse_int = inv; ffta_kernel = clCreateKernel(program, "fft2da", &status); - checkError(status, "Failed to create fft3da kernel"); + checkError(status, "Failed to create fft2da kernel"); fftb_kernel = clCreateKernel(program, "fft2db", &status); - checkError(status, "Failed to create fft3db kernel"); + checkError(status, "Failed to create fft2db kernel"); - fetch_kernel = clCreateKernel(program, "fetch", &status); + fetch_kernel = clCreateKernel(program, "fetchBitrev1", &status); checkError(status, "Failed to create fetch kernel"); - transpose_kernel = clCreateKernel(program, "transpose", &status); - checkError(status, "Failed to create transpose kernel"); + transpose_kernel1 = clCreateKernel(program, "transpose1", &status); + checkError(status, "Failed to create transpose1 kernel"); + + transpose_kernel2 = clCreateKernel(program, "transpose2", &status); + checkError(status, "Failed to create transpose2 kernel"); store_kernel = clCreateKernel(program, "store", &status); checkError(status, "Failed to create store kernel"); @@ -586,13 +590,16 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv, int interl status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fft kernel"); - status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); + status = clEnqueueTask(queue3, transpose_kernel1, 0, NULL, NULL); + checkError(status, "Failed to launch transpose1 kernel"); status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); checkError(status, "Failed to launch second fft kernel"); - status = clEnqueueTask(queue5, store_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue5, transpose_kernel2, 0, NULL, NULL); + checkError(status, "Failed to launch transpose2 kernel"); + + status = clEnqueueTask(queue6, store_kernel, 0, NULL, NULL); checkError(status, "Failed to launch store kernel"); // Wait for all command queues to complete pending events @@ -606,6 +613,8 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv, int interl checkError(status, "failed to finish"); status = clFinish(queue5); checkError(status, "failed to finish"); + status = clFinish(queue5); + checkError(status, "failed to finish"); fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; // Copy results from device to host @@ -629,8 +638,11 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv, int interl if(fftb_kernel) clReleaseKernel(fftb_kernel); - if(transpose_kernel) - clReleaseKernel(transpose_kernel); + if(transpose_kernel1) + clReleaseKernel(transpose_kernel1); + + if(transpose_kernel2) + clReleaseKernel(transpose_kernel2); if(store_kernel) clReleaseKernel(store_kernel); @@ -818,7 +830,7 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv) { int inverse_int = inv; // Setup kernels - cl_kernel fetch1_kernel = clCreateKernel(program, "fetch1", &status); + cl_kernel fetch1_kernel = clCreateKernel(program, "fetchBitrev1", &status); checkError(status, "Failed to create fetch1 kernel"); cl_kernel ffta_kernel = clCreateKernel(program, "fft3da", &status); checkError(status, "Failed to create fft3da kernel"); @@ -826,14 +838,14 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv) { checkError(status, "Failed to create transpose kernel"); cl_kernel fftb_kernel = clCreateKernel(program, "fft3db", &status); checkError(status, "Failed to create fft3db kernel"); - cl_kernel store1_kernel = clCreateKernel(program, "store1", &status); + cl_kernel store1_kernel = clCreateKernel(program, "transposeStore1", &status); checkError(status, "Failed to create store1 kernel"); - cl_kernel fetch2_kernel = clCreateKernel(program, "fetch2", &status); + cl_kernel fetch2_kernel = clCreateKernel(program, "fetchBitrev2", &status); checkError(status, "Failed to create fetch2 kernel"); cl_kernel fftc_kernel = clCreateKernel(program, "fft3dc", &status); checkError(status, "Failed to create fft3dc kernel"); - cl_kernel store2_kernel = clCreateKernel(program, "store2", &status); + cl_kernel store2_kernel = clCreateKernel(program, "transposeStore2", &status); checkError(status, "Failed to create store2 kernel"); // Setup Queues to the kernels diff --git a/examples/common/verify_fftw.c b/examples/common/verify_fftw.c index a5910de..bd57afe 100644 --- a/examples/common/verify_fftw.c +++ b/examples/common/verify_fftw.c @@ -134,7 +134,7 @@ bool verify_sp_fft2d_fftw(float2 *fpgaout, float2 *verify, int N, int inverse){ mag_sum += magnitude; noise_sum += noise; #ifndef NDEBUG - //printf("%zu : fpga - (%e %e) cpu - (%e %e)\n", i, fpgaout[i].x, fpgaout[i].y, fftw_data[i][0], fftw_data[i][1]); + printf("%zu : fpga - (%e %e) cpu - (%e %e)\n", i, fpgaout[i].x, fpgaout[i].y, fftw_data[i][0], fftw_data[i][1]); #endif } diff --git a/kernels/fft3d/fft3d_ddr.cl b/kernels/fft3d/fft3d_ddr.cl index 320199b..c6889e1 100755 --- a/kernels/fft3d/fft3d_ddr.cl +++ b/kernels/fft3d/fft3d_ddr.cl @@ -1,34 +1,21 @@ -// Author: Arjun Ramaswami +// Author: Arjun Ramaswami -#include "fft_8.cl" - -// Source the log(size) (log(1k) = 10) from a header shared with the host code #include "fft_config.h" +#include "fft_8.cl" +#include "../matrixTranspose/diagonal_bitrev.cl" #pragma OPENCL EXTENSION cl_intel_channels : enable -channel float2 chaninfft1[8] __attribute__((depth(8))); -channel float2 chanoutfft1[8] __attribute__((depth(8))); - -channel float2 chaninfft2[8] __attribute__((depth(8))); -channel float2 chanoutfft2[8] __attribute__((depth(8))); - -channel float2 chaninfft3[8] __attribute__((depth(8))); -channel float2 chanoutfft3[8] __attribute__((depth(8))); - -int bit_reversed(int x, int bits) { - int y = 0; - #pragma unroll - for (int i = 0; i < bits; i++) { - y <<= 1; - y |= x & 1; - x >>= 1; - } - return y; -} + +channel float2 chaninfft3da[POINTS] __attribute__((depth(POINTS))); +channel float2 chaninfft3db[POINTS] __attribute__((depth(POINTS))); +channel float2 chaninfft3dc[POINTS] __attribute__((depth(POINTS))); + +channel float2 chaninTranspose[POINTS] __attribute__((depth(POINTS))); +channel float2 chaninTranStore1[POINTS] __attribute__((depth(POINTS))); +channel float2 chaninTranStore2[POINTS] __attribute__((depth(POINTS))); // Kernel that fetches data from global memory -kernel void fetch1(global volatile float2 * restrict src) { - const unsigned N = (1 << LOGN); +kernel void fetchBitrev1(global volatile float2 * restrict src) { for(unsigned k = 0; k < (N * N); k++){ float2 buf[N]; @@ -39,14 +26,14 @@ kernel void fetch1(global volatile float2 * restrict src) { } for(unsigned j = 0; j < (N / 8); j++){ - write_channel_intel(chaninfft1[0], buf[j]); // 0 - write_channel_intel(chaninfft1[1], buf[4 * N / 8 + j]); // 32 - write_channel_intel(chaninfft1[2], buf[2 * N / 8 + j]); // 16 - write_channel_intel(chaninfft1[3], buf[6 * N / 8 + j]); // 48 - write_channel_intel(chaninfft1[4], buf[N / 8 + j]); // 8 - write_channel_intel(chaninfft1[5], buf[5 * N / 8 + j]); // 40 - write_channel_intel(chaninfft1[6], buf[3 * N / 8 + j]); // 24 - write_channel_intel(chaninfft1[7], buf[7 * N / 8 + j]); // 54 + write_channel_intel(chaninfft3da[0], buf[j]); // 0 + write_channel_intel(chaninfft3da[1], buf[4 * N / 8 + j]); // 32 + write_channel_intel(chaninfft3da[2], buf[2 * N / 8 + j]); // 16 + write_channel_intel(chaninfft3da[3], buf[6 * N / 8 + j]); // 48 + write_channel_intel(chaninfft3da[4], buf[N / 8 + j]); // 8 + write_channel_intel(chaninfft3da[5], buf[5 * N / 8 + j]); // 40 + write_channel_intel(chaninfft3da[6], buf[3 * N / 8 + j]); // 24 + write_channel_intel(chaninfft3da[7], buf[7 * N / 8 + j]); // 54 } } } @@ -56,7 +43,6 @@ kernel void fetch1(global volatile float2 * restrict src) { */ kernel void fft3da(int inverse) { - const int N = (1 << LOGN); /* The FFT engine requires a sliding window for data reordering; data stored * in this array is carried across loop iterations and shifted by 1 element @@ -65,147 +51,115 @@ kernel void fft3da(int inverse) { */ float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; - for( int j = 0; j < N; j++){ - - // needs to run "N / 8 - 1" additional iterations to drain the last outputs - for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { - float2x8 data; - - // Read data from channels - if (i < N * (N / POINTS)) { - data.i0 = read_channel_intel(chaninfft1[0]); - data.i1 = read_channel_intel(chaninfft1[1]); - data.i2 = read_channel_intel(chaninfft1[2]); - data.i3 = read_channel_intel(chaninfft1[3]); - data.i4 = read_channel_intel(chaninfft1[4]); - data.i5 = read_channel_intel(chaninfft1[5]); - data.i6 = read_channel_intel(chaninfft1[6]); - data.i7 = read_channel_intel(chaninfft1[7]); - } - else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } - // Perform one FFT step - data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); + #pragma loop_coalesce + for(unsigned j = 0; j < N; j++){ + for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { + float2x8 data; + + if (i < N * (N / POINTS)) { + data.i0 = read_channel_intel(chaninfft3da[0]); + data.i1 = read_channel_intel(chaninfft3da[1]); + data.i2 = read_channel_intel(chaninfft3da[2]); + data.i3 = read_channel_intel(chaninfft3da[3]); + data.i4 = read_channel_intel(chaninfft3da[4]); + data.i5 = read_channel_intel(chaninfft3da[5]); + data.i6 = read_channel_intel(chaninfft3da[6]); + data.i7 = read_channel_intel(chaninfft3da[7]); + } + else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } - // Write result to channels - if (i >= N / POINTS - 1) { - write_channel_intel(chanoutfft1[0], data.i0); - write_channel_intel(chanoutfft1[1], data.i1); - write_channel_intel(chanoutfft1[2], data.i2); - write_channel_intel(chanoutfft1[3], data.i3); - write_channel_intel(chanoutfft1[4], data.i4); - write_channel_intel(chanoutfft1[5], data.i5); - write_channel_intel(chanoutfft1[6], data.i6); - write_channel_intel(chanoutfft1[7], data.i7); - } + data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); + + // Write result to channels + if (i >= N / POINTS - 1) { + write_channel_intel(chaninTranspose[0], data.i0); + write_channel_intel(chaninTranspose[1], data.i1); + write_channel_intel(chaninTranspose[2], data.i2); + write_channel_intel(chaninTranspose[3], data.i3); + write_channel_intel(chaninTranspose[4], data.i4); + write_channel_intel(chaninTranspose[5], data.i5); + write_channel_intel(chaninTranspose[6], data.i6); + write_channel_intel(chaninTranspose[7], data.i7); } - } + } + } } __attribute__((max_global_work_dim(0))) -kernel void transpose1() { - const unsigned N = (1 << LOGN); - const unsigned DEPTH = (1 << (LOGN + LOGN - LOGPOINTS)); - - // iterate over N 2d matrices - for(unsigned k = 0 ; k < N; k++){ - - // Buffer with width - 8 points, depth - (N*N / 8), banked column-wise - float2 buf[DEPTH][POINTS]; - - // iterate within a 2d matrix - for(unsigned row = 0; row < N; row++){ - - // Temporary buffer to rotate before filling the matrix - //float2 rotate_in[POINTS]; - float2 bitrev[N]; - - // bit-reversed ordered input stored in normal order - for(unsigned j = 0; j < (N / 8); j++){ - bitrev[j] = read_channel_intel(chanoutfft1[0]); // 0 - bitrev[4 * N / 8 + j] = read_channel_intel(chanoutfft1[1]); // 32 - bitrev[2 * N / 8 + j] = read_channel_intel(chanoutfft1[2]); // 16 - bitrev[6 * N / 8 + j] = read_channel_intel(chanoutfft1[3]); // 48 - bitrev[N / 8 + j] = read_channel_intel(chanoutfft1[4]); // 8 - bitrev[5 * N / 8 + j] = read_channel_intel(chanoutfft1[5]); // 40 - bitrev[3 * N / 8 + j] = read_channel_intel(chanoutfft1[6]); // 24 - bitrev[7 * N / 8 + j] = read_channel_intel(chanoutfft1[7]); // 54 - } - - /* For each outer loop iteration, N data items are processed. - * These N data items should reside in N/8 rows in buf. - * Each of this N/8 rows are rotated by 1 - * Considering BRAM is POINTS wide, rotations should wrap around at POINTS - * row & (POINTS - 1) - */ - unsigned rot = row & (POINTS - 1); - - // fill the POINTS wide row of the buffer each iteration - // N/8 rows filled with the same rotation - for(unsigned j = 0; j < N / 8; j++){ - - float2 rotate_in[POINTS]; - #pragma unroll 8 - for(unsigned i = 0; i < POINTS; i++){ - rotate_in[i] = bitrev[(j * POINTS) + i]; - } - - #pragma unroll 8 - for(unsigned i = 0; i < 8; i++){ - unsigned where = ((i + POINTS) - rot) & (POINTS - 1); - unsigned buf_row = (row * (N / 8)) + j; - buf[buf_row][i] = rotate_in[where]; - } - } +kernel void transpose() { + const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 + bool is_bufA = false, is_bitrevA = false; + + float2 buf[2][DEPTH][POINTS]; + float2 bitrev_in[2][N], bitrev_out[2][N] ; + //float2 bitrev_in[2][N] __attribute__((memory("MLAB"))); + + int initial_delay = DELAY + DELAY; // for each of the bitrev buffer + + // additional iterations to fill the buffers + for(int step = -initial_delay; step < ((N * DEPTH) + DEPTH); step++){ + + float2x8 data, data_out; + if (step < ((N * DEPTH) - initial_delay)) { + data.i0 = read_channel_intel(chaninTranspose[0]); + data.i1 = read_channel_intel(chaninTranspose[1]); + data.i2 = read_channel_intel(chaninTranspose[2]); + data.i3 = read_channel_intel(chaninTranspose[3]); + data.i4 = read_channel_intel(chaninTranspose[4]); + data.i5 = read_channel_intel(chaninTranspose[5]); + data.i6 = read_channel_intel(chaninTranspose[6]); + data.i7 = read_channel_intel(chaninTranspose[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; } - for(unsigned row = 0; row < N; row++){ - - float2 rotate_out[N]; - unsigned offset = 0; - - #pragma unroll 8 - for(unsigned j = 0; j < N; j++){ - unsigned rot = (DEPTH + j - row) << (LOGN - LOGPOINTS) & (DEPTH -1); - unsigned offset = row >> LOGPOINTS; - unsigned row_rotate = offset + rot; - unsigned col_rotate = j & (POINTS - 1); - - rotate_out[j] = buf[row_rotate][col_rotate]; - } - - for(unsigned j = 0; j < N / 8; j++){ - unsigned rev = bit_reversed((j * POINTS), LOGN); - unsigned rot_out = row & (N - 1); - - unsigned chan0 = (rot_out + rev) & (N - 1); // 0 - unsigned chan1 = ((4 * N / 8) + rot_out + rev) & (N - 1); // 32 - unsigned chan2 = ((2 * N / 8) + rot_out + rev) & (N - 1); // 16 - unsigned chan3 = ((6 * N / 8) + rot_out + rev) & (N - 1); // 48 - unsigned chan4 = ((N / 8) + rot_out + rev) & (N - 1); // 8 - unsigned chan5 = ((5 * N / 8) + rot_out + rev) & (N - 1); // 40 - unsigned chan6 = ((3 * N / 8) + rot_out + rev) & (N - 1); // 24 - unsigned chan7 = ((7 * N / 8) + rot_out + rev) & (N - 1); // 56 - - write_channel_intel(chaninfft2[0], rotate_out[chan0]); // 0 - write_channel_intel(chaninfft2[1], rotate_out[chan1]); // 32 - write_channel_intel(chaninfft2[2], rotate_out[chan2]); // 16 - write_channel_intel(chaninfft2[3], rotate_out[chan3]); // 48 - write_channel_intel(chaninfft2[4], rotate_out[chan4]); // 8 - write_channel_intel(chaninfft2[5], rotate_out[chan5]); // 40 - write_channel_intel(chaninfft2[6], rotate_out[chan6]); // 24 - write_channel_intel(chaninfft2[7], rotate_out[chan7]); // 54 - } - } // row - - } // iter matrice + // Swap buffers every N*N/8 iterations + // starting from the additional delay of N/8 iterations + is_bufA = (( (step + DELAY) & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; + + // Swap bitrev buffers every N/8 iterations + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + unsigned row = step & (DEPTH - 1); + data = bitreverse_in(data, + is_bitrevA ? bitrev_in[0] : bitrev_in[1], + is_bitrevA ? bitrev_in[1] : bitrev_in[0], + row); + + writeBuf(data, + is_bufA ? buf[0] : buf[1], + step, DELAY); + + data_out = readBuf( + is_bufA ? buf[1] : buf[0], + step); + + unsigned start_row = (step + DELAY) & (DEPTH -1); + data_out = bitreverse_out( + is_bitrevA ? bitrev_out[0] : bitrev_out[1], + is_bitrevA ? bitrev_out[1] : bitrev_out[0], + data_out, start_row); + + + if (step >= (DEPTH)) { + write_channel_intel(chaninfft3db[0], data_out.i0); + write_channel_intel(chaninfft3db[1], data_out.i1); + write_channel_intel(chaninfft3db[2], data_out.i2); + write_channel_intel(chaninfft3db[3], data_out.i3); + write_channel_intel(chaninfft3db[4], data_out.i4); + write_channel_intel(chaninfft3db[5], data_out.i5); + write_channel_intel(chaninfft3db[6], data_out.i6); + write_channel_intel(chaninfft3db[7], data_out.i7); + } + } } kernel void fft3db(int inverse) { - const int N = (1 << LOGN); /* The FFT engine requires a sliding window for data reordering; data stored * in this array is carried across loop iterations and shifted by 1 element @@ -221,14 +175,14 @@ kernel void fft3db(int inverse) { // Read data from channels if (i < N * (N / POINTS)) { - data.i0 = read_channel_intel(chaninfft2[0]); - data.i1 = read_channel_intel(chaninfft2[1]); - data.i2 = read_channel_intel(chaninfft2[2]); - data.i3 = read_channel_intel(chaninfft2[3]); - data.i4 = read_channel_intel(chaninfft2[4]); - data.i5 = read_channel_intel(chaninfft2[5]); - data.i6 = read_channel_intel(chaninfft2[6]); - data.i7 = read_channel_intel(chaninfft2[7]); + data.i0 = read_channel_intel(chaninfft3db[0]); + data.i1 = read_channel_intel(chaninfft3db[1]); + data.i2 = read_channel_intel(chaninfft3db[2]); + data.i3 = read_channel_intel(chaninfft3db[3]); + data.i4 = read_channel_intel(chaninfft3db[4]); + data.i5 = read_channel_intel(chaninfft3db[5]); + data.i6 = read_channel_intel(chaninfft3db[6]); + data.i7 = read_channel_intel(chaninfft3db[7]); } else { data.i0 = data.i1 = data.i2 = data.i3 = data.i4 = data.i5 = data.i6 = data.i7 = 0; @@ -239,160 +193,84 @@ kernel void fft3db(int inverse) { // Write result to channels if (i >= N / POINTS - 1) { - write_channel_intel(chanoutfft2[0], data.i0); - write_channel_intel(chanoutfft2[1], data.i1); - write_channel_intel(chanoutfft2[2], data.i2); - write_channel_intel(chanoutfft2[3], data.i3); - write_channel_intel(chanoutfft2[4], data.i4); - write_channel_intel(chanoutfft2[5], data.i5); - write_channel_intel(chanoutfft2[6], data.i6); - write_channel_intel(chanoutfft2[7], data.i7); + write_channel_intel(chaninTranStore1[0], data.i0); + write_channel_intel(chaninTranStore1[1], data.i1); + write_channel_intel(chaninTranStore1[2], data.i2); + write_channel_intel(chaninTranStore1[3], data.i3); + write_channel_intel(chaninTranStore1[4], data.i4); + write_channel_intel(chaninTranStore1[5], data.i5); + write_channel_intel(chaninTranStore1[6], data.i6); + write_channel_intel(chaninTranStore1[7], data.i7); } } } } __attribute__((max_global_work_dim(0))) -kernel void transpose2() { - const unsigned N = (1 << LOGN); - const unsigned DEPTH = (1 << (LOGN + LOGN - LOGPOINTS)); - - // iterate over N 2d matrices - for(unsigned k = 0 ; k < N; k++){ - - // Buffer with width - 8 points, depth - (N*N / 8), banked column-wise - float2 buf[DEPTH][POINTS]; - - // iterate within a 2d matrix - for(unsigned row = 0; row < N; row++){ - - // Temporary buffer to rotate before filling the matrix - //float2 rotate_in[POINTS]; - float2 bitrev[N]; - - // bit-reversed ordered input stored in normal order - for(unsigned j = 0; j < (N / 8); j++){ - bitrev[j] = read_channel_intel(chaninfft2[0]); // 0 - bitrev[4 * N / 8 + j] = read_channel_intel(chaninfft2[1]); // 32 - bitrev[2 * N / 8 + j] = read_channel_intel(chaninfft2[2]); // 16 - bitrev[6 * N / 8 + j] = read_channel_intel(chaninfft2[3]); // 48 - bitrev[N / 8 + j] = read_channel_intel(chaninfft2[4]); // 8 - bitrev[5 * N / 8 + j] = read_channel_intel(chaninfft2[5]); // 40 - bitrev[3 * N / 8 + j] = read_channel_intel(chaninfft2[6]); // 24 - bitrev[7 * N / 8 + j] = read_channel_intel(chaninfft2[7]); // 54 - } - - /* For each outer loop iteration, N data items are processed. - * These N data items should reside in N/8 rows in buf. - * Each of this N/8 rows are rotated by 1 - * Considering BRAM is POINTS wide, rotations should wrap around at POINTS - * row & (POINTS - 1) - */ - unsigned rot = row & (POINTS - 1); - - // fill the POINTS wide row of the buffer each iteration - // N/8 rows filled with the same rotation - for(unsigned j = 0; j < N / 8; j++){ - - float2 rotate_in[POINTS]; - #pragma unroll 8 - for(unsigned i = 0; i < POINTS; i++){ - rotate_in[i] = bitrev[(j * POINTS) + i]; - } - - #pragma unroll 8 - for(unsigned i = 0; i < 8; i++){ - unsigned where = ((i + POINTS) - rot) & (POINTS - 1); - unsigned buf_row = (row * (N / 8)) + j; - buf[buf_row][i] = rotate_in[where]; - } - } +kernel void transposeStore1(global float2 * restrict dest) { + + const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 + bool is_bufA = false, is_bitrevA = false; + + float2 buf[2][DEPTH][POINTS]; + float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; + + int initial_delay = DELAY; // for each of the bitrev buffer + // additional iterations to fill the buffers + for(int step = -initial_delay; step < ((N * DEPTH) + DEPTH); step++){ + + float2x8 data, data_out; + if (step < ((N * DEPTH) - initial_delay)) { + data.i0 = read_channel_intel(chaninTranStore1[0]); + data.i1 = read_channel_intel(chaninTranStore1[1]); + data.i2 = read_channel_intel(chaninTranStore1[2]); + data.i3 = read_channel_intel(chaninTranStore1[3]); + data.i4 = read_channel_intel(chaninTranStore1[4]); + data.i5 = read_channel_intel(chaninTranStore1[5]); + data.i6 = read_channel_intel(chaninTranStore1[6]); + data.i7 = read_channel_intel(chaninTranStore1[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; } - - for(unsigned row = 0; row < N; row++){ - - float2 rotate_out[N]; - unsigned offset = 0; - - #pragma unroll 8 - for(unsigned j = 0; j < N; j++){ - unsigned rot = (DEPTH + j - row) << (LOGN - LOGPOINTS) & (DEPTH -1); - unsigned offset = row >> LOGPOINTS; - unsigned row_rotate = offset + rot; - unsigned col_rotate = j & (POINTS - 1); - - rotate_out[j] = buf[row_rotate][col_rotate]; - } - - for(unsigned j = 0; j < N / 8; j++){ - unsigned rev = bit_reversed((j * POINTS), LOGN); - unsigned rot_out = row & (N - 1); - - unsigned chan0 = (rot_out + rev) & (N - 1); // 0 - unsigned chan1 = ((4 * N / 8) + rot_out + rev) & (N - 1); // 32 - unsigned chan2 = ((2 * N / 8) + rot_out + rev) & (N - 1); // 16 - unsigned chan3 = ((6 * N / 8) + rot_out + rev) & (N - 1); // 48 - unsigned chan4 = ((N / 8) + rot_out + rev) & (N - 1); // 8 - unsigned chan5 = ((5 * N / 8) + rot_out + rev) & (N - 1); // 40 - unsigned chan6 = ((3 * N / 8) + rot_out + rev) & (N - 1); // 24 - unsigned chan7 = ((7 * N / 8) + rot_out + rev) & (N - 1); // 56 - - write_channel_intel(chanoutstore1[0], rotate_out[chan0]); // 0 - write_channel_intel(chanoutstore1[1], rotate_out[chan1]); // 32 - write_channel_intel(chanoutstore1[2], rotate_out[chan2]); // 16 - write_channel_intel(chanoutstore1[3], rotate_out[chan3]); // 48 - write_channel_intel(chanoutstore1[4], rotate_out[chan4]); // 8 - write_channel_intel(chanoutstore1[5], rotate_out[chan5]); // 40 - write_channel_intel(chanoutstore1[6], rotate_out[chan6]); // 24 - write_channel_intel(chanoutstore1[7], rotate_out[chan7]); // 54 - } - } // row - - } // iter matrice -} - -/* - * Input through channels in bit reversed format - */ -kernel void store1(global volatile float2 * restrict dest){ - const unsigned N = (1 << LOGN); - local float2 buf[N * N]; - - for(unsigned zdim = 0; zdim < N; zdim++){ - - // Store yx plane in buffer, ydim in bit reversed format - for(unsigned xdim = 0; xdim < N; xdim++){ - for(unsigned ydim = 0; ydim < (N / 8); ydim++){ - unsigned where = ((xdim * N) + (ydim * POINTS)); - - buf[where + 0] = read_channel_intel(chanoutfft2[0]); - buf[where + 1] = read_channel_intel(chanoutfft2[1]); - buf[where + 2] = read_channel_intel(chanoutfft2[2]); - buf[where + 3] = read_channel_intel(chanoutfft2[3]); - buf[where + 4] = read_channel_intel(chanoutfft2[4]); - buf[where + 5] = read_channel_intel(chanoutfft2[5]); - buf[where + 6] = read_channel_intel(chanoutfft2[6]); - buf[where + 7] = read_channel_intel(chanoutfft2[7]); - } - } // stored yx plane in buffer - - for(unsigned ydim = 0; ydim < N; ydim++){ - // bit reverse rows / ydim to get back normal order - unsigned revcolt = bit_reversed(ydim, LOGN); - - unsigned ddr_loc = (zdim * N * N) + (ydim * N); - - #pragma unroll 8 - for( unsigned xdim = 0; xdim < N; xdim++){ - dest[ddr_loc + xdim] = buf[(xdim * N) + revcolt]; - } + // Swap buffers every N*N/8 iterations + // starting from the additional delay of N/8 iterations + is_bufA = (( step & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; + + // Swap bitrev buffers every N/8 iterations + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + unsigned row = step & (DEPTH - 1); + data = bitreverse_in(data, + is_bitrevA ? bitrev_in[0] : bitrev_in[1], + is_bitrevA ? bitrev_in[1] : bitrev_in[0], + row); + + writeBuf(data, + is_bufA ? buf[0] : buf[1], + step, 0); + + data_out = readBuf_store( + is_bufA ? buf[1] : buf[0], + step); + + if (step >= (DEPTH)) { + unsigned index = (step - DEPTH) * 8; + + dest[index + 0] = data_out.i0; + dest[index + 1] = data_out.i1; + dest[index + 2] = data_out.i2; + dest[index + 3] = data_out.i3; + dest[index + 4] = data_out.i4; + dest[index + 5] = data_out.i5; + dest[index + 6] = data_out.i6; + dest[index + 7] = data_out.i7; } - } // stored N*N*N points in DDR + } } // Kernel that fetches data from global memory -kernel void fetch2(global volatile float2 * restrict src) { - const unsigned N = (1 << LOGN); +kernel void fetchBitrev2(global volatile float2 * restrict src) { local float2 buf[N * N]; for(unsigned ydim = 0; ydim < N; ydim++){ @@ -416,14 +294,14 @@ kernel void fetch2(global volatile float2 * restrict src) { for(unsigned k = 0; k < (N / 8); k++){ unsigned where = i + (k * N); - write_channel_intel(chaninfft3[0], buf[where]); // 0 - write_channel_intel(chaninfft3[1], buf[where + 4 * (N / 8) * N]); // 32 - write_channel_intel(chaninfft3[2], buf[where + 2 * (N / 8) * N]); // 16 - write_channel_intel(chaninfft3[3], buf[where + 6 * (N / 8) * N]); // 48 - write_channel_intel(chaninfft3[4], buf[where + (N / 8) * N]); // 8 - write_channel_intel(chaninfft3[5], buf[where + 5 * (N / 8) * N]); // 40 - write_channel_intel(chaninfft3[6], buf[where + 3 * (N / 8) * N]); // 24 - write_channel_intel(chaninfft3[7], buf[where + 7 * (N / 8) * N]); // 54 + write_channel_intel(chaninfft3dc[0], buf[where]); + write_channel_intel(chaninfft3dc[1], buf[where + 4 * (N / 8) * N]); + write_channel_intel(chaninfft3dc[2], buf[where + 2 * (N / 8) * N]); + write_channel_intel(chaninfft3dc[3], buf[where + 6 * (N / 8) * N]); + write_channel_intel(chaninfft3dc[4], buf[where + (N / 8) * N]); + write_channel_intel(chaninfft3dc[5], buf[where + 5 * (N / 8) * N]); + write_channel_intel(chaninfft3dc[6], buf[where + 3 * (N / 8) * N]); + write_channel_intel(chaninfft3dc[7], buf[where + 7 * (N / 8) * N]); } } } // y axis @@ -433,7 +311,6 @@ kernel void fetch2(global volatile float2 * restrict src) { * Input and output data in bit-reversed format */ kernel void fft3dc(int inverse) { - const int N = (1 << LOGN); /* The FFT engine requires a sliding window for data reordering; data stored * in this array is carried across loop iterations and shifted by 1 element @@ -449,14 +326,14 @@ kernel void fft3dc(int inverse) { // Read data from channels if (i < N * (N / POINTS)) { - data.i0 = read_channel_intel(chaninfft3[0]); - data.i1 = read_channel_intel(chaninfft3[1]); - data.i2 = read_channel_intel(chaninfft3[2]); - data.i3 = read_channel_intel(chaninfft3[3]); - data.i4 = read_channel_intel(chaninfft3[4]); - data.i5 = read_channel_intel(chaninfft3[5]); - data.i6 = read_channel_intel(chaninfft3[6]); - data.i7 = read_channel_intel(chaninfft3[7]); + data.i0 = read_channel_intel(chaninfft3dc[0]); + data.i1 = read_channel_intel(chaninfft3dc[1]); + data.i2 = read_channel_intel(chaninfft3dc[2]); + data.i3 = read_channel_intel(chaninfft3dc[3]); + data.i4 = read_channel_intel(chaninfft3dc[4]); + data.i5 = read_channel_intel(chaninfft3dc[5]); + data.i6 = read_channel_intel(chaninfft3dc[6]); + data.i7 = read_channel_intel(chaninfft3dc[7]); } else { data.i0 = data.i1 = data.i2 = data.i3 = data.i4 = data.i5 = data.i6 = data.i7 = 0; @@ -467,81 +344,98 @@ kernel void fft3dc(int inverse) { // Write result to channels if (i >= N / POINTS - 1) { - write_channel_intel(chanoutfft3[0], data.i0); - write_channel_intel(chanoutfft3[1], data.i1); - write_channel_intel(chanoutfft3[2], data.i2); - write_channel_intel(chanoutfft3[3], data.i3); - write_channel_intel(chanoutfft3[4], data.i4); - write_channel_intel(chanoutfft3[5], data.i5); - write_channel_intel(chanoutfft3[6], data.i6); - write_channel_intel(chanoutfft3[7], data.i7); + write_channel_intel(chaninTranStore2[0], data.i0); + write_channel_intel(chaninTranStore2[1], data.i1); + write_channel_intel(chaninTranStore2[2], data.i2); + write_channel_intel(chaninTranStore2[3], data.i3); + write_channel_intel(chaninTranStore2[4], data.i4); + write_channel_intel(chaninTranStore2[5], data.i5); + write_channel_intel(chaninTranStore2[6], data.i6); + write_channel_intel(chaninTranStore2[7], data.i7); } } } } -/* - * input through channels: transformed zx planes - * - values in the z axis is in bitreversed format - */ -kernel void store2(global float2 * restrict dest){ - - const unsigned N = (1 << LOGN); - - local float2 buf[N * N]; - - for(unsigned ydim = 0; ydim < N; ydim++){ - - /* - * Store zx plane in 2d buffer in bit reversed format - * - outer loop iterates rows - * - inner loop stores elements of each row / zdim in bursts of POINTS (8) - */ - for(unsigned xdim = 0; xdim < N; xdim++){ - for(unsigned zdim = 0; zdim < (N / 8); zdim++){ - - // xdim * N iterates through the 2nd dim, here x - unsigned where = ((xdim * N) + (zdim * POINTS)); - - buf[where + 0] = read_channel_intel(chanoutfft3[0]); - buf[where + 1] = read_channel_intel(chanoutfft3[1]); - buf[where + 2] = read_channel_intel(chanoutfft3[2]); - buf[where + 3] = read_channel_intel(chanoutfft3[3]); - buf[where + 4] = read_channel_intel(chanoutfft3[4]); - buf[where + 5] = read_channel_intel(chanoutfft3[5]); - buf[where + 6] = read_channel_intel(chanoutfft3[6]); - buf[where + 7] = read_channel_intel(chanoutfft3[7]); - - } - } // zx plane stored in buffer - - /* - * Transpose and bitreverse the zx plane in 2d buffer to xz, - * then store in global memory - * - outer loop iterates through the rows / zdim - * - inner loop iterates through each column - * - selects elements based from bit reversed indices - */ - for(unsigned zdim = 0; zdim < N; zdim++){ - - // write to ddr in planes of xz - unsigned ddr_loc = ( (ydim * N) + (zdim * N * N) ); - - /* - * Read column-wise in buffer as a transpose of zx to xz plane - * store in ddr row-wise (xdim) then zdim - * 1. bit reverse z axis - revcolt(z) - * 2. transpose zx to xz - xdim * N - * : combine both to read the bitreversed column directly - buf_loc - */ - unsigned revcolt = bit_reversed(zdim, LOGN); - - #pragma unroll 8 - for(unsigned xdim = 0; xdim < N; xdim++){ - unsigned buf_loc = revcolt + (xdim * N); - dest[ddr_loc + xdim] = buf[buf_loc]; - } - } // stored 2d buffer to ddr - - } // stored entire 3d points to ddr -} +__attribute__((max_global_work_dim(0))) +kernel void transposeStore2(global float2 * restrict dest) { + + const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 + bool is_bufA = false, is_bitrevA = false; + + float2 buf[2][DEPTH][POINTS]; + float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; + + int initial_delay = DELAY; // for each of the bitrev buffer + // additional iterations to fill the buffers + for(int step = -initial_delay; step < ((N * DEPTH) + DEPTH); step++){ + + float2x8 data, data_out; + if (step < ((N * DEPTH) - initial_delay)) { + data.i0 = read_channel_intel(chaninTranStore2[0]); + data.i1 = read_channel_intel(chaninTranStore2[1]); + data.i2 = read_channel_intel(chaninTranStore2[2]); + data.i3 = read_channel_intel(chaninTranStore2[3]); + data.i4 = read_channel_intel(chaninTranStore2[4]); + data.i5 = read_channel_intel(chaninTranStore2[5]); + data.i6 = read_channel_intel(chaninTranStore2[6]); + data.i7 = read_channel_intel(chaninTranStore2[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + // Swap buffers every N*N/8 iterations + // starting from the additional delay of N/8 iterations + is_bufA = (( step & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; + + // Swap bitrev buffers every N/8 iterations + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + unsigned row = step & (DEPTH - 1); + data = bitreverse_in(data, + is_bitrevA ? bitrev_in[0] : bitrev_in[1], + is_bitrevA ? bitrev_in[1] : bitrev_in[0], + row); + + writeBuf(data, + is_bufA ? buf[0] : buf[1], + step, 0); + + data_out = readBuf_store( + is_bufA ? buf[1] : buf[0], + step); + + if (step >= (DEPTH)) { + unsigned start_index = (step - DEPTH); + // increment z by 1 every N/8 steps until (N*N/ 8) + unsigned zdim = (start_index >> (LOGN - LOGPOINTS)) & (N - 1); + + // increment y by 1 every N*N/8 points until N + unsigned ydim = (start_index >> (LOGN + LOGN - LOGPOINTS)) & (N - 1); + + // incremenet by 8 until N / 8 + unsigned xdim = (start_index * 8) & ( N - 1); + //unsigned index = (step - DEPTH) * 8; + + // increment by N*N*N + unsigned cube = LOGN + LOGN + LOGN - LOGPOINTS; + + // increment by 1 every N*N*N / 8 steps + unsigned batch_index = (start_index >> cube); + //unsigned batch_index = 0; + + unsigned index = (batch_index * N * N * N) + (zdim * N * N) + (ydim * N) + xdim; + + //printf("start_index: %u, batch: %u, zim: %u, ydim: %u, xdim: %u, index: %u \n", start_index, batch_index, zdim, ydim, xdim, index); + + dest[index + 0] = data_out.i0; + dest[index + 1] = data_out.i1; + dest[index + 2] = data_out.i2; + dest[index + 3] = data_out.i3; + dest[index + 4] = data_out.i4; + dest[index + 5] = data_out.i5; + dest[index + 6] = data_out.i6; + dest[index + 7] = data_out.i7; + } + } +} \ No newline at end of file From efb3e202e1f89adf83ae68b60ce1de7c52a54261 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Wed, 5 Aug 2020 17:53:43 +0200 Subject: [PATCH 16/76] loop coalesced fft kernels --- kernels/fft3d/fft3d_ddr.cl | 139 ++++++++++++++++++------------------- 1 file changed, 69 insertions(+), 70 deletions(-) diff --git a/kernels/fft3d/fft3d_ddr.cl b/kernels/fft3d/fft3d_ddr.cl index c6889e1..90135cd 100755 --- a/kernels/fft3d/fft3d_ddr.cl +++ b/kernels/fft3d/fft3d_ddr.cl @@ -168,42 +168,40 @@ kernel void fft3db(int inverse) { */ float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; - for( int j = 0; j < N; j++){ - - for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { - float2x8 data; - - // Read data from channels - if (i < N * (N / POINTS)) { - data.i0 = read_channel_intel(chaninfft3db[0]); - data.i1 = read_channel_intel(chaninfft3db[1]); - data.i2 = read_channel_intel(chaninfft3db[2]); - data.i3 = read_channel_intel(chaninfft3db[3]); - data.i4 = read_channel_intel(chaninfft3db[4]); - data.i5 = read_channel_intel(chaninfft3db[5]); - data.i6 = read_channel_intel(chaninfft3db[6]); - data.i7 = read_channel_intel(chaninfft3db[7]); - } else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } - - // Perform one FFT step - data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); - - // Write result to channels - if (i >= N / POINTS - 1) { - write_channel_intel(chaninTranStore1[0], data.i0); - write_channel_intel(chaninTranStore1[1], data.i1); - write_channel_intel(chaninTranStore1[2], data.i2); - write_channel_intel(chaninTranStore1[3], data.i3); - write_channel_intel(chaninTranStore1[4], data.i4); - write_channel_intel(chaninTranStore1[5], data.i5); - write_channel_intel(chaninTranStore1[6], data.i6); - write_channel_intel(chaninTranStore1[7], data.i7); - } + + #pragma loop_coalesce + for(unsigned j = 0; j < N; j++){ + for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { + float2x8 data; + + if (i < N * (N / POINTS)) { + data.i0 = read_channel_intel(chaninfft3db[0]); + data.i1 = read_channel_intel(chaninfft3db[1]); + data.i2 = read_channel_intel(chaninfft3db[2]); + data.i3 = read_channel_intel(chaninfft3db[3]); + data.i4 = read_channel_intel(chaninfft3db[4]); + data.i5 = read_channel_intel(chaninfft3db[5]); + data.i6 = read_channel_intel(chaninfft3db[6]); + data.i7 = read_channel_intel(chaninfft3db[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; } - } + + data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); + + if (i >= N / POINTS - 1) { + write_channel_intel(chaninTranStore1[0], data.i0); + write_channel_intel(chaninTranStore1[1], data.i1); + write_channel_intel(chaninTranStore1[2], data.i2); + write_channel_intel(chaninTranStore1[3], data.i3); + write_channel_intel(chaninTranStore1[4], data.i4); + write_channel_intel(chaninTranStore1[5], data.i5); + write_channel_intel(chaninTranStore1[6], data.i6); + write_channel_intel(chaninTranStore1[7], data.i7); + } + } + } } __attribute__((max_global_work_dim(0))) @@ -319,42 +317,43 @@ kernel void fft3dc(int inverse) { */ float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; - for( int j = 0; j < N; j++){ - - for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { - float2x8 data; - - // Read data from channels - if (i < N * (N / POINTS)) { - data.i0 = read_channel_intel(chaninfft3dc[0]); - data.i1 = read_channel_intel(chaninfft3dc[1]); - data.i2 = read_channel_intel(chaninfft3dc[2]); - data.i3 = read_channel_intel(chaninfft3dc[3]); - data.i4 = read_channel_intel(chaninfft3dc[4]); - data.i5 = read_channel_intel(chaninfft3dc[5]); - data.i6 = read_channel_intel(chaninfft3dc[6]); - data.i7 = read_channel_intel(chaninfft3dc[7]); - } else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } - - // Perform one FFT step - data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); - - // Write result to channels - if (i >= N / POINTS - 1) { - write_channel_intel(chaninTranStore2[0], data.i0); - write_channel_intel(chaninTranStore2[1], data.i1); - write_channel_intel(chaninTranStore2[2], data.i2); - write_channel_intel(chaninTranStore2[3], data.i3); - write_channel_intel(chaninTranStore2[4], data.i4); - write_channel_intel(chaninTranStore2[5], data.i5); - write_channel_intel(chaninTranStore2[6], data.i6); - write_channel_intel(chaninTranStore2[7], data.i7); - } + + #pragma loop_coalesce + for(unsigned j = 0; j < N; j++){ + + for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { + float2x8 data; + + if (i < N * (N / POINTS)) { + data.i0 = read_channel_intel(chaninfft3dc[0]); + data.i1 = read_channel_intel(chaninfft3dc[1]); + data.i2 = read_channel_intel(chaninfft3dc[2]); + data.i3 = read_channel_intel(chaninfft3dc[3]); + data.i4 = read_channel_intel(chaninfft3dc[4]); + data.i5 = read_channel_intel(chaninfft3dc[5]); + data.i6 = read_channel_intel(chaninfft3dc[6]); + data.i7 = read_channel_intel(chaninfft3dc[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; } - } + + // Perform one FFT step + data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); + + // Write result to channels + if (i >= N / POINTS - 1) { + write_channel_intel(chaninTranStore2[0], data.i0); + write_channel_intel(chaninTranStore2[1], data.i1); + write_channel_intel(chaninTranStore2[2], data.i2); + write_channel_intel(chaninTranStore2[3], data.i3); + write_channel_intel(chaninTranStore2[4], data.i4); + write_channel_intel(chaninTranStore2[5], data.i5); + write_channel_intel(chaninTranStore2[6], data.i6); + write_channel_intel(chaninTranStore2[7], data.i7); + } + } + } } __attribute__((max_global_work_dim(0))) From f8dc0827c8dfec3b967157bd6eb3c7852443084d Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Wed, 5 Aug 2020 22:32:39 +0200 Subject: [PATCH 17/76] fft3d: single loop dbl buffered fetch1 --- kernels/fft3d/fft3d_ddr.cl | 55 +++++++++++++++------- kernels/matrixTranspose/diagonal_bitrev.cl | 28 +++++++++++ 2 files changed, 67 insertions(+), 16 deletions(-) diff --git a/kernels/fft3d/fft3d_ddr.cl b/kernels/fft3d/fft3d_ddr.cl index 90135cd..4457863 100755 --- a/kernels/fft3d/fft3d_ddr.cl +++ b/kernels/fft3d/fft3d_ddr.cl @@ -16,24 +16,48 @@ channel float2 chaninTranStore2[POINTS] __attribute__((depth(POINTS))); // Kernel that fetches data from global memory kernel void fetchBitrev1(global volatile float2 * restrict src) { + unsigned delay = (1 << (LOGN - LOGPOINTS)); // N / 8 + bool is_bitrevA = false; - for(unsigned k = 0; k < (N * N); k++){ - float2 buf[N]; - - #pragma unroll 8 - for(unsigned i = 0; i < N; i++){ - buf[i & ((1<= delay) { + write_channel_intel(chaninfft3da[0], data.i0); + write_channel_intel(chaninfft3da[1], data.i1); + write_channel_intel(chaninfft3da[2], data.i2); + write_channel_intel(chaninfft3da[3], data.i3); + write_channel_intel(chaninfft3da[4], data.i4); + write_channel_intel(chaninfft3da[5], data.i5); + write_channel_intel(chaninfft3da[6], data.i6); + write_channel_intel(chaninfft3da[7], data.i7); } } } @@ -41,7 +65,6 @@ kernel void fetchBitrev1(global volatile float2 * restrict src) { /* This single work-item task wraps the FFT engine * 'inverse' toggles between the direct and the inverse transform */ - kernel void fft3da(int inverse) { /* The FFT engine requires a sliding window for data reordering; data stored diff --git a/kernels/matrixTranspose/diagonal_bitrev.cl b/kernels/matrixTranspose/diagonal_bitrev.cl index 1bea1d8..3f0654f 100644 --- a/kernels/matrixTranspose/diagonal_bitrev.cl +++ b/kernels/matrixTranspose/diagonal_bitrev.cl @@ -1,5 +1,33 @@ // Authors: Tobias Kenter, Arjun Ramaswami +float2x8 bitreverse_fetch(float2x8 data, float2 bitrev_outA[N], float2 bitrev_outB[N], unsigned row){ + + const unsigned STEPS = (1 << (LOGN - LOGPOINTS)); + unsigned index = (row & (STEPS - 1)) * 8; + + bitrev_outA[index + 0] = data.i0; + bitrev_outA[index + 1] = data.i1; + bitrev_outA[index + 2] = data.i2; + bitrev_outA[index + 3] = data.i3; + bitrev_outA[index + 4] = data.i4; + bitrev_outA[index + 5] = data.i5; + bitrev_outA[index + 6] = data.i6; + bitrev_outA[index + 7] = data.i7; + + unsigned index_out = (row & (STEPS - 1)); + float2x8 rotate_out; + rotate_out.i0 = bitrev_outB[index_out]; + rotate_out.i1 = bitrev_outB[(4 * N / 8) + index_out]; + rotate_out.i2 = bitrev_outB[(2 * N / 8) + index_out]; + rotate_out.i3 = bitrev_outB[(6 * N / 8) + index_out]; + rotate_out.i4 = bitrev_outB[(N / 8) + index_out]; + rotate_out.i5 = bitrev_outB[(5 * N / 8) + index_out]; + rotate_out.i6 = bitrev_outB[(3 * N / 8) + index_out]; + rotate_out.i7 = bitrev_outB[(7 * N / 8) + index_out]; + + return rotate_out; +} + float2x8 bitreverse_out(float2 bitrev_outA[N], float2 bitrev_outB[N], float2x8 data, unsigned row){ float2 rotate_in[POINTS]; From a469f653884e8e0ba7df8ec540c727cc698b660a Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Thu, 6 Aug 2020 13:00:58 +0200 Subject: [PATCH 18/76] fft3d: fetch_transpose working --- kernels/fft3d/fft3d_ddr.cl | 114 ++++++++++++++------- kernels/matrixTranspose/diagonal_bitrev.cl | 27 +++++ 2 files changed, 102 insertions(+), 39 deletions(-) diff --git a/kernels/fft3d/fft3d_ddr.cl b/kernels/fft3d/fft3d_ddr.cl index 4457863..bd0d31a 100755 --- a/kernels/fft3d/fft3d_ddr.cl +++ b/kernels/fft3d/fft3d_ddr.cl @@ -118,8 +118,10 @@ kernel void transpose() { bool is_bufA = false, is_bitrevA = false; float2 buf[2][DEPTH][POINTS]; - float2 bitrev_in[2][N], bitrev_out[2][N] ; - //float2 bitrev_in[2][N] __attribute__((memory("MLAB"))); + //float2 bitrev_in[2][N], bitrev_out[2][N]; + //float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; + float2 bitrev_in[2][N]; + float2 __attribute__((memory, numbanks(8))) bitrev_out[2][N]; int initial_delay = DELAY + DELAY; // for each of the bitrev buffer @@ -234,7 +236,8 @@ kernel void transposeStore1(global float2 * restrict dest) { bool is_bufA = false, is_bitrevA = false; float2 buf[2][DEPTH][POINTS]; - float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; + //float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; + float2 bitrev_in[2][N]; int initial_delay = DELAY; // for each of the bitrev buffer // additional iterations to fill the buffers @@ -289,43 +292,77 @@ kernel void transposeStore1(global float2 * restrict dest) { } } } +__attribute__((max_global_work_dim(0))) +kernel void fetchBitrev2(global float2 * restrict src) { + unsigned delay = (1 << (LOGN - LOGPOINTS)); // N / 8 -// Kernel that fetches data from global memory -kernel void fetchBitrev2(global volatile float2 * restrict src) { - local float2 buf[N * N]; - - for(unsigned ydim = 0; ydim < N; ydim++){ - /* - * Store xz plane in the buffer - */ - for(unsigned i = 0; i < N; i++){ - unsigned ddr_loc = ( (i * N * N) + (ydim * N) ); - - #pragma unroll 8 - for(unsigned xdim = 0; xdim < N; xdim++){ - buf[(i * N) + xdim] = src[ddr_loc + xdim]; - } + bool is_bufA = false, is_bitrevA = false; + float2 __attribute__((memory, numbanks(8))) bitrev_out[2][N]; + float2 buf[2][DEPTH][POINTS]; + + // additional iterations to fill the buffers + for(unsigned step = 0; step < (N * DEPTH) + DEPTH + delay; step++){ + // increment z by 1 every N/8 steps until (N*N/ 8) + unsigned start_index = step + delay; + unsigned zdim = (step >> (LOGN - LOGPOINTS)) & (N - 1); + + // increment y by 1 every N*N/8 points until N + unsigned ydim = (step >> (LOGN + LOGN - LOGPOINTS)) & (N - 1); + + // increment by 8 until N / 8 + unsigned xdim = (step * 8) & (N - 1); + + // increment by 1 every N*N*N / 8 steps + unsigned batch_index = (step >> (LOGN + LOGN + LOGN - LOGPOINTS)); + + unsigned index = (batch_index * N * N * N) + (zdim * N * N) + (ydim * N) + xdim; + + float2x8 data, data_out; + if (step < (N * DEPTH)) { + data.i0 = src[index + 0]; + data.i1 = src[index + 1]; + data.i2 = src[index + 2]; + data.i3 = src[index + 3]; + data.i4 = src[index + 4]; + data.i5 = src[index + 5]; + data.i6 = src[index + 6]; + data.i7 = src[index + 7]; + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; } + + is_bufA = (( step & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; - /* Transpose xz plane i.e. zx - * Transfer bit reverse input to FFT - */ - for(unsigned i = 0; i < N; i++){ - - for(unsigned k = 0; k < (N / 8); k++){ - unsigned where = i + (k * N); - - write_channel_intel(chaninfft3dc[0], buf[where]); - write_channel_intel(chaninfft3dc[1], buf[where + 4 * (N / 8) * N]); - write_channel_intel(chaninfft3dc[2], buf[where + 2 * (N / 8) * N]); - write_channel_intel(chaninfft3dc[3], buf[where + 6 * (N / 8) * N]); - write_channel_intel(chaninfft3dc[4], buf[where + (N / 8) * N]); - write_channel_intel(chaninfft3dc[5], buf[where + 5 * (N / 8) * N]); - write_channel_intel(chaninfft3dc[6], buf[where + 3 * (N / 8) * N]); - write_channel_intel(chaninfft3dc[7], buf[where + 7 * (N / 8) * N]); - } + // Swap bitrev buffers every N/8 iterations + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + writeBuf(data, + is_bufA ? buf[0] : buf[1], + step, 0); + + data_out = readBuf_fetch( + is_bufA ? buf[1] : buf[0], + step, 0); + + unsigned start_row = step & (DEPTH -1); + data_out = bitreverse_out( + is_bitrevA ? bitrev_out[0] : bitrev_out[1], + is_bitrevA ? bitrev_out[1] : bitrev_out[0], + data_out, start_row); + + if (step >= (DEPTH + delay)) { + + write_channel_intel(chaninfft3dc[0], data_out.i0); + write_channel_intel(chaninfft3dc[1], data_out.i1); + write_channel_intel(chaninfft3dc[2], data_out.i2); + write_channel_intel(chaninfft3dc[3], data_out.i3); + write_channel_intel(chaninfft3dc[4], data_out.i4); + write_channel_intel(chaninfft3dc[5], data_out.i5); + write_channel_intel(chaninfft3dc[6], data_out.i6); + write_channel_intel(chaninfft3dc[7], data_out.i7); } - } // y axis + } } /* @@ -386,7 +423,8 @@ kernel void transposeStore2(global float2 * restrict dest) { bool is_bufA = false, is_bitrevA = false; float2 buf[2][DEPTH][POINTS]; - float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; + float2 bitrev_in[2][N]; + //float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; int initial_delay = DELAY; // for each of the bitrev buffer // additional iterations to fill the buffers @@ -448,8 +486,6 @@ kernel void transposeStore2(global float2 * restrict dest) { unsigned index = (batch_index * N * N * N) + (zdim * N * N) + (ydim * N) + xdim; - //printf("start_index: %u, batch: %u, zim: %u, ydim: %u, xdim: %u, index: %u \n", start_index, batch_index, zdim, ydim, xdim, index); - dest[index + 0] = data_out.i0; dest[index + 1] = data_out.i1; dest[index + 2] = data_out.i2; diff --git a/kernels/matrixTranspose/diagonal_bitrev.cl b/kernels/matrixTranspose/diagonal_bitrev.cl index 3f0654f..6eec735 100644 --- a/kernels/matrixTranspose/diagonal_bitrev.cl +++ b/kernels/matrixTranspose/diagonal_bitrev.cl @@ -192,5 +192,32 @@ float2x8 readBuf_store(float2 buf[DEPTH][POINTS], unsigned step){ data.i6 = rotate_out[(6 + rot_out) & (POINTS - 1)]; data.i7 = rotate_out[(7 + rot_out) & (POINTS - 1)]; + return data; +} + +float2x8 readBuf_fetch(float2 buf[DEPTH][POINTS], unsigned step, unsigned delay){ + unsigned rows = (step + delay); + unsigned base = (rows & (N / POINTS - 1)) << LOGN; // 0, N, 2N, ... + unsigned offset = (rows >> LOGN) & ((N / 8) - 1); // 0, .. N / POINTS + + float2 rotate_out[POINTS]; + float2x8 data; + + #pragma unroll POINTS + for(unsigned i = 0; i < POINTS; i++){ + unsigned rot = ((POINTS + i - (rows >> (LOGN - LOGPOINTS))) << (LOGN - LOGPOINTS)) & (N - 1); + unsigned row_rotate = (base + offset + rot); + rotate_out[i] = buf[row_rotate][i]; + } + + data.i0 = rotate_out[0]; + data.i1 = rotate_out[1]; + data.i2 = rotate_out[2]; + data.i3 = rotate_out[3]; + data.i4 = rotate_out[4]; + data.i5 = rotate_out[5]; + data.i6 = rotate_out[6]; + data.i7 = rotate_out[7]; + return data; } \ No newline at end of file From 8c090f8c448af6e9e658ed228692f29d1ec30c25 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Fri, 7 Aug 2020 12:57:29 +0200 Subject: [PATCH 19/76] Buffer location in stores, iteration wise measures - buffer location to enable DDR stores in SVM, - print iteration wise performance measurments - modify kernel names in host to match the kernel --- api/src/fftfpga.c | 8 ++++---- examples/common/helper.c | 5 ++++- examples/fft3d.c | 5 +++++ kernels/fft3d/fft3d_ddr.cl | 4 ++-- 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/api/src/fftfpga.c b/api/src/fftfpga.c index 9602923..484b7bb 100755 --- a/api/src/fftfpga.c +++ b/api/src/fftfpga.c @@ -1056,7 +1056,7 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, float2 *inp, float2 *out, int inv, i int inverse_int = inv; // Setup kernels - cl_kernel fetch1_kernel = clCreateKernel(program, "fetch1", &status); + cl_kernel fetch1_kernel = clCreateKernel(program, "fetchBitrev1", &status); checkError(status, "Failed to create fetch1 kernel"); cl_kernel ffta_kernel = clCreateKernel(program, "fft3da", &status); checkError(status, "Failed to create fft3da kernel"); @@ -1064,14 +1064,14 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, float2 *inp, float2 *out, int inv, i checkError(status, "Failed to create transpose kernel"); cl_kernel fftb_kernel = clCreateKernel(program, "fft3db", &status); checkError(status, "Failed to create fft3db kernel"); - cl_kernel store1_kernel = clCreateKernel(program, "store1", &status); + cl_kernel store1_kernel = clCreateKernel(program, "transposeStore1", &status); checkError(status, "Failed to create store1 kernel"); - cl_kernel fetch2_kernel = clCreateKernel(program, "fetch2", &status); + cl_kernel fetch2_kernel = clCreateKernel(program, "fetchBitrev2", &status); checkError(status, "Failed to create fetch2 kernel"); cl_kernel fftc_kernel = clCreateKernel(program, "fft3dc", &status); checkError(status, "Failed to create fft3dc kernel"); - cl_kernel store2_kernel = clCreateKernel(program, "store2", &status); + cl_kernel store2_kernel = clCreateKernel(program, "transposeStore2", &status); checkError(status, "Failed to create store2 kernel"); // Setup Queues to the kernels diff --git a/examples/common/helper.c b/examples/common/helper.c index 500b522..cdc1284 100755 --- a/examples/common/helper.c +++ b/examples/common/helper.c @@ -115,11 +115,14 @@ void display_measures(double total_api_time, double pcie_rd, double pcie_wr, dou printf("Points = %d%s \n", N, dim == 1 ? "" : dim == 2 ? "^2" : "^3"); printf("Precision = %s\n", sp==1 ? "Single": "Double"); printf("Direction = %s\n", inv ? "Backward":"Forward"); + printf("Iterations = %d\n", iter); + printf("%s", iter>1 ? "Average Measurements\n":""); printf("PCIe Write = %.2lfms\n", pcie_write); printf("Kernel Execution = %.2lfms\n", exec); printf("PCIe Read = %.2lfms\n", pcie_read); + printf("Total = %.2lfms\n", pcie_read + exec + pcie_write); printf("Throughput = %.2lfGFLOPS/s | %.2lf GB/s\n", gflops, gBytes_per_sec); - printf("Avg API runtime = %.2lfms\n", avg_api_time); + printf("API runtime = %.2lfms\n", avg_api_time); } /** diff --git a/examples/fft3d.c b/examples/fft3d.c index 96ab6fe..7f19a8d 100755 --- a/examples/fft3d.c +++ b/examples/fft3d.c @@ -119,6 +119,11 @@ int main(int argc, const char **argv) { avg_wr += timing.pcie_write_t; avg_exec += timing.exec_t; + printf("Iter: %lu\n", i); + printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); + printf("\tKernel: %lfms\n", timing.exec_t); + printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); + // destroy FFT input and output free(inp); free(out); diff --git a/kernels/fft3d/fft3d_ddr.cl b/kernels/fft3d/fft3d_ddr.cl index bd0d31a..60f6c7e 100755 --- a/kernels/fft3d/fft3d_ddr.cl +++ b/kernels/fft3d/fft3d_ddr.cl @@ -230,7 +230,7 @@ kernel void fft3db(int inverse) { } __attribute__((max_global_work_dim(0))) -kernel void transposeStore1(global float2 * restrict dest) { +kernel void transposeStore1(__global __attribute__((buffer_location(BUFFER_LOCATION))) volatile float2 * restrict dest) { const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 bool is_bufA = false, is_bitrevA = false; @@ -293,7 +293,7 @@ kernel void transposeStore1(global float2 * restrict dest) { } } __attribute__((max_global_work_dim(0))) -kernel void fetchBitrev2(global float2 * restrict src) { +kernel void fetchBitrev2(__global __attribute__((buffer_location(BUFFER_LOCATION))) volatile float2 * restrict src) { unsigned delay = (1 << (LOGN - LOGPOINTS)); // N / 8 bool is_bufA = false, is_bitrevA = false; From b7c457fe4c912e5109a0a0d7836ac5e3e8cf7ad7 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Wed, 12 Aug 2020 16:46:24 +0200 Subject: [PATCH 20/76] FFT2d: bram dbl buf and single loop --- api/include/fftfpga/fftfpga.h | 3 +- api/src/fftfpga.c | 67 +++--- examples/fft2d.c | 2 +- kernels/fft2d/fft2d_bram_opt.cl | 368 +++++++++++++------------------- tests/test_fft2d_fpga.cpp | 8 +- 5 files changed, 189 insertions(+), 259 deletions(-) diff --git a/api/include/fftfpga/fftfpga.h b/api/include/fftfpga/fftfpga.h index fbc9542..bca32a7 100755 --- a/api/include/fftfpga/fftfpga.h +++ b/api/include/fftfpga/fftfpga.h @@ -96,9 +96,10 @@ extern fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, int inv, int iter * @param out : float2 pointer to output data of size [N * N] * @param inv : int toggle to activate backward FFT * @param interleaving : 1 if interleaved global memory buffers + * @param how_many : number of 2D FFTs to computer, default 1 * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv, int interleaving); +extern fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv, int interleaving, int how_many); /** * @brief compute an out-of-place single precision complex 2D-FFT using the DDR of the FPGA diff --git a/api/src/fftfpga.c b/api/src/fftfpga.c index 484b7bb..84ca1fa 100755 --- a/api/src/fftfpga.c +++ b/api/src/fftfpga.c @@ -506,14 +506,14 @@ fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, int inv){ * \param interleaving : 1 if interleaved global memory buffers * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv, int interleaving){ +fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv, int interleaving, int how_many){ fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_kernel ffta_kernel = NULL, fftb_kernel = NULL; cl_kernel fetch_kernel = NULL, store_kernel = NULL; - cl_kernel transpose_kernel1 = NULL, transpose_kernel2 = NULL; + cl_kernel transpose_kernel = NULL; cl_int status = 0; - int num_pts = N * N; + int num_pts = how_many * N * N; // if N is not a power of 2 if(inp == NULL || out == NULL || ( (N & (N-1)) !=0)){ @@ -535,6 +535,7 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv, int interl flagbuf1 = CL_MEM_WRITE_ONLY | CL_CHANNEL_1_INTELFPGA; flagbuf2 = CL_MEM_READ_ONLY | CL_CHANNEL_2_INTELFPGA; } + // Device memory buffers cl_mem d_inData, d_outData; d_inData = clCreateBuffer(context, flagbuf1, sizeof(float2) * num_pts, NULL, &status); @@ -559,30 +560,46 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv, int interl ffta_kernel = clCreateKernel(program, "fft2da", &status); checkError(status, "Failed to create fft2da kernel"); + fftb_kernel = clCreateKernel(program, "fft2db", &status); checkError(status, "Failed to create fft2db kernel"); - fetch_kernel = clCreateKernel(program, "fetchBitrev1", &status); + fetch_kernel = clCreateKernel(program, "fetchBitrev", &status); checkError(status, "Failed to create fetch kernel"); - transpose_kernel1 = clCreateKernel(program, "transpose1", &status); + transpose_kernel = clCreateKernel(program, "transpose", &status); checkError(status, "Failed to create transpose1 kernel"); - transpose_kernel2 = clCreateKernel(program, "transpose2", &status); - checkError(status, "Failed to create transpose2 kernel"); - - store_kernel = clCreateKernel(program, "store", &status); + store_kernel = clCreateKernel(program, "transposeStore", &status); checkError(status, "Failed to create store kernel"); status = clSetKernelArg(fetch_kernel, 0, sizeof(cl_mem), (void *)&d_inData); - checkError(status, "Failed to set fetch kernel arg"); + checkError(status, "Failed to set fetch kernel arg 0"); + + status = clSetKernelArg(fetch_kernel, 1, sizeof(cl_int), (void *)&how_many); + checkError(status, "Failed to set fetch kernel arg 1"); + status = clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set ffta kernel arg"); + checkError(status, "Failed to set ffta kernel arg 0"); + + status = clSetKernelArg(ffta_kernel, 1, sizeof(cl_int), (void*)&how_many); + checkError(status, "Failed to set ffta kernel arg 1"); + + status = clSetKernelArg(transpose_kernel, 0, sizeof(cl_int), (void*)&how_many); + checkError(status, "Failed to set transpose kernel arg 0"); + status = clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fftb kernel arg"); + checkError(status, "Failed to set fftb kernel arg 0"); + + status = clSetKernelArg(fftb_kernel, 1, sizeof(cl_int), (void*)&how_many); + checkError(status, "Failed to set fftb kernel arg 1"); + status = clSetKernelArg(store_kernel, 0, sizeof(cl_mem), (void *)&d_outData); checkError(status, "Failed to set store kernel arg"); + status = clSetKernelArg(store_kernel, 1, sizeof(cl_int), (void *)&how_many); + checkError(status, "Failed to set store kernel arg"); + fft_time.exec_t = getTimeinMilliSec(); status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fetch kernel"); @@ -590,31 +607,26 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv, int interl status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fft kernel"); - status = clEnqueueTask(queue3, transpose_kernel1, 0, NULL, NULL); + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); checkError(status, "Failed to launch transpose1 kernel"); status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); checkError(status, "Failed to launch second fft kernel"); - status = clEnqueueTask(queue5, transpose_kernel2, 0, NULL, NULL); - checkError(status, "Failed to launch transpose2 kernel"); - - status = clEnqueueTask(queue6, store_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue5, store_kernel, 0, NULL, NULL); checkError(status, "Failed to launch store kernel"); // Wait for all command queues to complete pending events status = clFinish(queue1); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue1"); status = clFinish(queue2); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue2"); status = clFinish(queue3); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue3"); status = clFinish(queue4); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue4"); status = clFinish(queue5); - checkError(status, "failed to finish"); - status = clFinish(queue5); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue5"); fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; // Copy results from device to host @@ -638,11 +650,8 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv, int interl if(fftb_kernel) clReleaseKernel(fftb_kernel); - if(transpose_kernel1) - clReleaseKernel(transpose_kernel1); - - if(transpose_kernel2) - clReleaseKernel(transpose_kernel2); + if(transpose_kernel) + clReleaseKernel(transpose_kernel); if(store_kernel) clReleaseKernel(store_kernel); diff --git a/examples/fft2d.c b/examples/fft2d.c index bc789a5..3ce12d8 100644 --- a/examples/fft2d.c +++ b/examples/fft2d.c @@ -84,7 +84,7 @@ int main(int argc, const char **argv) { if(use_bram == 1){ // use bram for 2d Transpose temp_timer = getTimeinMilliseconds(); - timing = fftfpgaf_c2c_2d_bram(N, inp, out, inv, interleaving); + timing = fftfpgaf_c2c_2d_bram(N, inp, out, inv, interleaving, batch); total_api_time += getTimeinMilliseconds() - temp_timer; } else{ diff --git a/kernels/fft2d/fft2d_bram_opt.cl b/kernels/fft2d/fft2d_bram_opt.cl index a12781e..f37a465 100644 --- a/kernels/fft2d/fft2d_bram_opt.cl +++ b/kernels/fft2d/fft2d_bram_opt.cl @@ -8,39 +8,57 @@ channel float2 chaninfft2da[POINTS] __attribute__((depth(POINTS))); channel float2 chaninfft2db[POINTS] __attribute__((depth(POINTS))); -channel float2 chaninTranspose1[POINTS] __attribute__((depth(POINTS))); -channel float2 chaninTranspose2[POINTS] __attribute__((depth(POINTS))); -channel float2 chaninStore[POINTS] __attribute__((depth(POINTS))); +channel float2 chaninTranspose[POINTS] __attribute__((depth(POINTS))); +channel float2 chaninTransStore[POINTS] __attribute__((depth(POINTS))); -// Kernel that fetches data from global memory -kernel void fetchBitrev1(global volatile float2 * restrict src) { +kernel void fetchBitrev(global volatile float2 * restrict src, int batch) { + unsigned delay = (1 << (LOGN - LOGPOINTS)); // N / 8 + bool is_bitrevA = false; - for(unsigned k = 0; k < N; k++){ - float2 buf[N]; + float2 __attribute__((memory, numbanks(8))) buf[2][N]; + + // additional iterations to fill the buffers + for(unsigned step = 0; step < (batch * DEPTH) + delay; step++){ + + unsigned where = (step & ((N * DEPTH) - 1)) * 8; - #pragma unroll 8 - for(unsigned i = 0; i < N; i++){ - buf[i & ((1<= delay) { + write_channel_intel(chaninfft2da[0], data.i0); + write_channel_intel(chaninfft2da[1], data.i1); + write_channel_intel(chaninfft2da[2], data.i2); + write_channel_intel(chaninfft2da[3], data.i3); + write_channel_intel(chaninfft2da[4], data.i4); + write_channel_intel(chaninfft2da[5], data.i5); + write_channel_intel(chaninfft2da[6], data.i6); + write_channel_intel(chaninfft2da[7], data.i7); } } } -/* This single work-item task wraps the FFT engine - * 'inverse' toggles between the direct and the inverse transform - */ - -kernel void fft2da(int inverse) { +kernel void fft2da(int inverse, int batch) { /* The FFT engine requires a sliding window for data reordering; data stored * in this array is carried across loop iterations and shifted by 1 element @@ -51,65 +69,70 @@ kernel void fft2da(int inverse) { float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; // needs to run "N / 8 - 1" additional iterations to drain the last outputs - for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { - float2x8 data; - - // Read data from channels - if (i < N * (N / POINTS)) { - data.i0 = read_channel_intel(chaninfft2da[0]); - data.i1 = read_channel_intel(chaninfft2da[1]); - data.i2 = read_channel_intel(chaninfft2da[2]); - data.i3 = read_channel_intel(chaninfft2da[3]); - data.i4 = read_channel_intel(chaninfft2da[4]); - data.i5 = read_channel_intel(chaninfft2da[5]); - data.i6 = read_channel_intel(chaninfft2da[6]); - data.i7 = read_channel_intel(chaninfft2da[7]); - } - else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } + #pragma loop_coalesce + for(unsigned j = 0; j < batch; j++){ + for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { + float2x8 data; + + // Read data from channels + if (i < N * (N / POINTS)) { + data.i0 = read_channel_intel(chaninfft2da[0]); + data.i1 = read_channel_intel(chaninfft2da[1]); + data.i2 = read_channel_intel(chaninfft2da[2]); + data.i3 = read_channel_intel(chaninfft2da[3]); + data.i4 = read_channel_intel(chaninfft2da[4]); + data.i5 = read_channel_intel(chaninfft2da[5]); + data.i6 = read_channel_intel(chaninfft2da[6]); + data.i7 = read_channel_intel(chaninfft2da[7]); + } + else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } - // Perform one FFT step - data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); - - // Write result to channels - if (i >= N / POINTS - 1) { - write_channel_intel(chaninTranspose1[0], data.i0); - write_channel_intel(chaninTranspose1[1], data.i1); - write_channel_intel(chaninTranspose1[2], data.i2); - write_channel_intel(chaninTranspose1[3], data.i3); - write_channel_intel(chaninTranspose1[4], data.i4); - write_channel_intel(chaninTranspose1[5], data.i5); - write_channel_intel(chaninTranspose1[6], data.i6); - write_channel_intel(chaninTranspose1[7], data.i7); + // Perform one FFT step + data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); + + // Write result to channels + if (i >= N / POINTS - 1) { + write_channel_intel(chaninTranspose[0], data.i0); + write_channel_intel(chaninTranspose[1], data.i1); + write_channel_intel(chaninTranspose[2], data.i2); + write_channel_intel(chaninTranspose[3], data.i3); + write_channel_intel(chaninTranspose[4], data.i4); + write_channel_intel(chaninTranspose[5], data.i5); + write_channel_intel(chaninTranspose[6], data.i6); + write_channel_intel(chaninTranspose[7], data.i7); + } } } } -__attribute__((max_global_work_dim(0))) -kernel void transpose1() { - const unsigned DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 +kernel void transpose(int batch) { + const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 bool is_bufA = false, is_bitrevA = false; float2 buf[2][DEPTH][POINTS]; - float2 bitrev_in[2][N], bitrev_out[2][N] ; - //float2 bitrev_in[2][N] __attribute__((memory("MLAB"))); + //float2 bitrev_in[2][N], bitrev_out[2][N]; + //float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; + float2 bitrev_in[2][N]; + float2 __attribute__((memory, numbanks(8))) bitrev_out[2][N]; int initial_delay = DELAY + DELAY; // for each of the bitrev buffer // additional iterations to fill the buffers - for(int step = -initial_delay; step < ((DEPTH) + DEPTH); step++){ + for(int step = -initial_delay; step < ((batch * DEPTH) + DEPTH); step++){ + float2x8 data, data_out; - if (step < ((DEPTH) - initial_delay)) { - data.i0 = read_channel_intel(chaninTranspose1[0]); - data.i1 = read_channel_intel(chaninTranspose1[1]); - data.i2 = read_channel_intel(chaninTranspose1[2]); - data.i3 = read_channel_intel(chaninTranspose1[3]); - data.i4 = read_channel_intel(chaninTranspose1[4]); - data.i5 = read_channel_intel(chaninTranspose1[5]); - data.i6 = read_channel_intel(chaninTranspose1[6]); - data.i7 = read_channel_intel(chaninTranspose1[7]); + if (step < ((batch * DEPTH) - initial_delay)) { + data.i0 = read_channel_intel(chaninTranspose[0]); + data.i1 = read_channel_intel(chaninTranspose[1]); + data.i2 = read_channel_intel(chaninTranspose[2]); + data.i3 = read_channel_intel(chaninTranspose[3]); + data.i4 = read_channel_intel(chaninTranspose[4]); + data.i5 = read_channel_intel(chaninTranspose[5]); + data.i6 = read_channel_intel(chaninTranspose[6]); + data.i7 = read_channel_intel(chaninTranspose[7]); } else { data.i0 = data.i1 = data.i2 = data.i3 = data.i4 = data.i5 = data.i6 = data.i7 = 0; @@ -155,7 +178,7 @@ kernel void transpose1() { } } -kernel void fft2db(int inverse) { +kernel void fft2db(int inverse, int batch) { /* The FFT engine requires a sliding window for data reordering; data stored * in this array is carried across loop iterations and shifted by 1 element @@ -165,62 +188,67 @@ kernel void fft2db(int inverse) { float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; - for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { - float2x8 data; - - // Read data from channels - if (i < N * (N / POINTS)) { - data.i0 = read_channel_intel(chaninfft2db[0]); - data.i1 = read_channel_intel(chaninfft2db[1]); - data.i2 = read_channel_intel(chaninfft2db[2]); - data.i3 = read_channel_intel(chaninfft2db[3]); - data.i4 = read_channel_intel(chaninfft2db[4]); - data.i5 = read_channel_intel(chaninfft2db[5]); - data.i6 = read_channel_intel(chaninfft2db[6]); - data.i7 = read_channel_intel(chaninfft2db[7]); - } else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } + #pragma loop_coalesce + for(unsigned j = 0; j < 1; j++){ + for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { + float2x8 data; + + // Read data from channels + if (i < N * (N / POINTS)) { + data.i0 = read_channel_intel(chaninfft2db[0]); + data.i1 = read_channel_intel(chaninfft2db[1]); + data.i2 = read_channel_intel(chaninfft2db[2]); + data.i3 = read_channel_intel(chaninfft2db[3]); + data.i4 = read_channel_intel(chaninfft2db[4]); + data.i5 = read_channel_intel(chaninfft2db[5]); + data.i6 = read_channel_intel(chaninfft2db[6]); + data.i7 = read_channel_intel(chaninfft2db[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } - // Perform one FFT step - data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); - - // Write result to channels - if (i >= N / POINTS - 1) { - write_channel_intel(chaninTranspose2[0], data.i0); - write_channel_intel(chaninTranspose2[1], data.i1); - write_channel_intel(chaninTranspose2[2], data.i2); - write_channel_intel(chaninTranspose2[3], data.i3); - write_channel_intel(chaninTranspose2[4], data.i4); - write_channel_intel(chaninTranspose2[5], data.i5); - write_channel_intel(chaninTranspose2[6], data.i6); - write_channel_intel(chaninTranspose2[7], data.i7); + // Perform one FFT step + data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); + + // Write result to channels + if (i >= N / POINTS - 1) { + write_channel_intel(chaninTransStore[0], data.i0); + write_channel_intel(chaninTransStore[1], data.i1); + write_channel_intel(chaninTransStore[2], data.i2); + write_channel_intel(chaninTransStore[3], data.i3); + write_channel_intel(chaninTransStore[4], data.i4); + write_channel_intel(chaninTransStore[5], data.i5); + write_channel_intel(chaninTransStore[6], data.i6); + write_channel_intel(chaninTransStore[7], data.i7); + } } } } -__attribute__((max_global_work_dim(0))) -kernel void transpose2() { +kernel void transposeStore(global volatile float2 * restrict dest, int batch) { + const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 bool is_bufA = false, is_bitrevA = false; float2 buf[2][DEPTH][POINTS]; - float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; + //float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; + float2 bitrev_in[2][N]; int initial_delay = DELAY; // for each of the bitrev buffer // additional iterations to fill the buffers - for(int step = -initial_delay; step < ((DEPTH) + DEPTH); step++){ + for(int step = -initial_delay; step < ((batch * DEPTH) + DEPTH); step++){ + float2x8 data, data_out; - if (step < ((DEPTH) - initial_delay)) { - data.i0 = read_channel_intel(chaninTranspose2[0]); - data.i1 = read_channel_intel(chaninTranspose2[1]); - data.i2 = read_channel_intel(chaninTranspose2[2]); - data.i3 = read_channel_intel(chaninTranspose2[3]); - data.i4 = read_channel_intel(chaninTranspose2[4]); - data.i5 = read_channel_intel(chaninTranspose2[5]); - data.i6 = read_channel_intel(chaninTranspose2[6]); - data.i7 = read_channel_intel(chaninTranspose2[7]); + if (step < ((batch * DEPTH) - initial_delay)) { + data.i0 = read_channel_intel(chaninTransStore[0]); + data.i1 = read_channel_intel(chaninTransStore[1]); + data.i2 = read_channel_intel(chaninTransStore[2]); + data.i3 = read_channel_intel(chaninTransStore[3]); + data.i4 = read_channel_intel(chaninTransStore[4]); + data.i5 = read_channel_intel(chaninTransStore[5]); + data.i6 = read_channel_intel(chaninTransStore[6]); + data.i7 = read_channel_intel(chaninTransStore[7]); } else { data.i0 = data.i1 = data.i2 = data.i3 = data.i4 = data.i5 = data.i6 = data.i7 = 0; @@ -246,115 +274,9 @@ kernel void transpose2() { is_bufA ? buf[1] : buf[0], step); - if (step >= (DEPTH)) { - write_channel_intel(chaninStore[0], data_out.i0); - write_channel_intel(chaninStore[1], data_out.i1); - write_channel_intel(chaninStore[2], data_out.i2); - write_channel_intel(chaninStore[3], data_out.i3); - write_channel_intel(chaninStore[4], data_out.i4); - write_channel_intel(chaninStore[5], data_out.i5); - write_channel_intel(chaninStore[6], data_out.i6); - write_channel_intel(chaninStore[7], data_out.i7); - } - } -} - -kernel void store(global volatile float2 * restrict dest){ - - // perform N*N writes to buffer - for(unsigned i = 0; i < N; i++){ - for(unsigned j = 0; j < (N / 8); j++){ - unsigned where = ((i << LOGN) + (j << LOGPOINTS)); - - #pragma unroll 8 - for(unsigned u = 0; u < 8; u++){ - dest[where + u] = read_channel_intel(chaninStore[u]); - } - } - } -} -/* -kernel void store(global volatile float2 * restrict dest){ - unsigned revcolt, where; - - local float2 buf[N * N]; - - // perform N*N writes to buffer - for(unsigned i = 0; i < N; i++){ - for(unsigned j = 0; j < (N / 8); j++){ - where = ((i << LOGN) + (j << LOGPOINTS)); - - #pragma unroll 8 - for(unsigned u = 0; u < 8; u++){ - buf[where + u] = read_channel_intel(chaninStore[u]); - } - } - } - - for(unsigned i = 0; i < N; i++){ - revcolt = bit_reversed(i, LOGN); - where = (i << LOGN); - - #pragma unroll 8 - for( unsigned u = 0; u < N; u++){ - dest[where + u] = buf[(u << LOGN) + revcolt]; - } - } -} -*/ -/* -kernel void store(global volatile float2 * restrict dest){ - const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 - bool is_bufA = false, is_bitrevA = false; - - float2 buf[2][DEPTH][POINTS]; - float2 bitrev_in[2][N], bitrev_out[2][N] ; - //float2 bitrev_in[2][N] __attribute__((memory("MLAB"))); - - int initial_delay = DELAY + DELAY; // for each of the bitrev buffer - - // additional iterations to fill the buffers - for(int step = -initial_delay; step < ((DEPTH) + DEPTH); step++){ - - float2x8 data, data_out; - if (step < ((DEPTH) - initial_delay)) { - data.i0 = read_channel_intel(chaninStore[0]); - data.i1 = read_channel_intel(chaninStore[1]); - data.i2 = read_channel_intel(chaninStore[2]); - data.i3 = read_channel_intel(chaninStore[3]); - data.i4 = read_channel_intel(chaninStore[4]); - data.i5 = read_channel_intel(chaninStore[5]); - data.i6 = read_channel_intel(chaninStore[6]); - data.i7 = read_channel_intel(chaninStore[7]); - } else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } - - // Swap buffers every N*N/8 iterations - // starting from the additional delay of N/8 iterations - is_bufA = (( (step + DELAY) & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; - - // Swap bitrev buffers every N/8 iterations - is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; - - unsigned row = step & (DEPTH - 1); - data = bitreverse_in(data, - is_bitrevA ? bitrev_in[0] : bitrev_in[1], - is_bitrevA ? bitrev_in[1] : bitrev_in[0], - row); - - writeBuf(data, - is_bufA ? buf[0] : buf[1], - step); - - data_out = readBuf( - is_bufA ? buf[1] : buf[0], - step); - if (step >= (DEPTH)) { unsigned index = (step - DEPTH) * 8; - printf("Store index - %d step : %d \n", index, step); + dest[index + 0] = data_out.i0; dest[index + 1] = data_out.i1; dest[index + 2] = data_out.i2; @@ -365,6 +287,4 @@ kernel void store(global volatile float2 * restrict dest){ dest[index + 7] = data_out.i7; } } - printf("Store Completed\n"); -} -*/ \ No newline at end of file +} \ No newline at end of file diff --git a/tests/test_fft2d_fpga.cpp b/tests/test_fft2d_fpga.cpp index 18cab06..5c75180 100644 --- a/tests/test_fft2d_fpga.cpp +++ b/tests/test_fft2d_fpga.cpp @@ -25,15 +25,15 @@ TEST(fft2dFPGATest, InputValidityBRAM){ fpga_t fft_time = {0.0, 0.0, 0.0, 0}; // null inp ptr input - fft_time = fftfpgaf_c2c_2d_bram(64, NULL, test, 0, 0); + fft_time = fftfpgaf_c2c_2d_bram(64, NULL, test, 0, 0, 1); EXPECT_EQ(fft_time.valid, 0); // null out ptr input - fft_time = fftfpgaf_c2c_2d_bram(64, test, NULL, 0, 0); + fft_time = fftfpgaf_c2c_2d_bram(64, test, NULL, 0, 0, 1); EXPECT_EQ(fft_time.valid, 0); // if N not a power of 2 - fft_time = fftfpgaf_c2c_2d_bram(63, test, test, 0, 0); + fft_time = fftfpgaf_c2c_2d_bram(63, test, test, 0, 0, 1); EXPECT_EQ(fft_time.valid, 0); free(test); @@ -55,7 +55,7 @@ TEST(fft2dFPGATest, CorrectnessBRAM){ fftf_create_data(inp, N * N); - fft_time = fftfpgaf_c2c_2d_bram(N, inp, out, 0, 0); + fft_time = fftfpgaf_c2c_2d_bram(N, inp, out, 0, 0, 1); int result = verify_sp_fft2d_fftw(out, inp, N, 0); From eb91587bbf05c9831aa500ff4934e650e8f8d5fc Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Thu, 20 Aug 2020 11:46:25 +0200 Subject: [PATCH 21/76] correct svm buffer flags, init output buffer, fft1d iter --- api/src/fftfpga.c | 212 ++++++++++++++++++++++--------------- examples/common/helper.h | 2 +- examples/fft1d.c | 65 +++++++----- examples/fft2d.c | 7 +- examples/fft3d.c | 3 +- examples/fft3d_svm_batch.c | 2 +- 6 files changed, 176 insertions(+), 115 deletions(-) diff --git a/api/src/fftfpga.c b/api/src/fftfpga.c index 84ca1fa..28ddcc1 100755 --- a/api/src/fftfpga.c +++ b/api/src/fftfpga.c @@ -159,12 +159,12 @@ void fpga_final(){ * \param inp : double2 pointer to input data of size N * \param out : double2 pointer to output data of size N * \param inv : int toggle to activate backward FFT - * \param iter : int toggle to activate backward FFT + * \param batch : number of batched executions of 1D FFT * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, int inv, int iter){ +fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, int inv, int batch){ fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - cl_kernel kernel1 = NULL, kernel2 = NULL; + cl_kernel fetch_kernel = NULL, fft_kernel = NULL; cl_int status = 0; // if N is not a power of 2 @@ -173,26 +173,22 @@ fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, int inv, int iter){ } #ifdef VERBOSE - printf("Launching%s FFT transform for %d iter \n", inv ? " inverse":"", iter); + printf("Launching%s FFT transform of %d batches \n", inv ? " inverse":"", batch); #endif queue_setup(); cl_mem d_inData, d_outData; - printf("Launching%s FFT transform for %d iter \n", inv ? " inverse":"", iter); - - // Create device buffers - assign the buffers in different banks for more efficient memory access - d_inData = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(double2) * N * iter, NULL, &status); + d_inData = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(double2) * N * batch, NULL, &status); checkError(status, "Failed to allocate input device buffer\n"); - // TODO: check CL_CHANNEL_2_INTELFPGA - d_outData = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(double2) * N * iter, NULL, &status); + d_outData = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(double2) * N * batch, NULL, &status); checkError(status, "Failed to allocate output device buffer\n"); // Copy data from host to device fft_time.pcie_write_t = getTimeinMilliSec(); - status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(double2) * N * iter, inp, 0, NULL, NULL); + status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(double2) * N * batch, inp, 0, NULL, NULL); fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; checkError(status, "Failed to copy data to device"); @@ -201,37 +197,36 @@ fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, int inv, int iter){ int inverse_int = inv; // Create Kernels - names must match the kernel name in the original CL file - kernel1 = clCreateKernel(program, "fetch", &status); + fetch_kernel = clCreateKernel(program, "fetch", &status); checkError(status, "Failed to create fetch kernel"); - kernel2 = clCreateKernel(program, "fft1d", &status); + fft_kernel = clCreateKernel(program, "fft1d", &status); checkError(status, "Failed to create fft1d kernel"); // Set the kernel arguments // from here - status = clSetKernelArg(kernel1, 0, sizeof(cl_mem), (void *)&d_inData); - checkError(status, "Failed to set kernel1 arg 0"); - status = clSetKernelArg(kernel2, 0, sizeof(cl_mem), (void *)&d_outData); - checkError(status, "Failed to set kernel arg 0"); - status = clSetKernelArg(kernel2, 1, sizeof(cl_int), (void*)&iter); - checkError(status, "Failed to set kernel arg 1"); - status = clSetKernelArg(kernel2, 2, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set kernel arg 2"); + status = clSetKernelArg(fetch_kernel, 0, sizeof(cl_mem), (void *)&d_inData); + checkError(status, "Failed to set fetch_kernel arg 0"); + status = clSetKernelArg(fft_kernel, 0, sizeof(cl_mem), (void *)&d_outData); + checkError(status, "Failed to set fft_kernel arg 0"); + status = clSetKernelArg(fft_kernel, 1, sizeof(cl_int), (void*)&batch); + checkError(status, "Failed to set fft_kernel arg 1"); + status = clSetKernelArg(fft_kernel, 2, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fft_kernel arg 2"); printf(inverse_int ? "\tInverse FFT" : "\tFFT"); printf(" kernel initialization is complete.\n"); size_t ls = N/8; - size_t gs = iter * ls; + size_t gs = batch * ls; // Measure execution time fft_time.exec_t = getTimeinMilliSec(); - // Launch the kernel - we launch a single work item hence enqueue a task // FFT1d kernel is the SWI kernel - status = clEnqueueTask(queue1, kernel2, 0, NULL, NULL); + status = clEnqueueTask(queue1, fft_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fft1d kernel"); - status = clEnqueueNDRangeKernel(queue2, kernel1, 1, NULL, &gs, &ls, 0, NULL, NULL); + status = clEnqueueNDRangeKernel(queue2, fetch_kernel, 1, NULL, &gs, &ls, 0, NULL, NULL); checkError(status, "Failed to launch fetch kernel"); // Wait for command queue to complete pending events @@ -245,7 +240,7 @@ fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, int inv, int iter){ // Copy results from device to host fft_time.pcie_read_t = getTimeinMilliSec(); - status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * iter, out, 0, NULL, NULL); + status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * batch, out, 0, NULL, NULL); fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; checkError(status, "Failed to copy data from device"); @@ -254,10 +249,10 @@ fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, int inv, int iter){ clReleaseMemObject(d_inData); if (d_outData) clReleaseMemObject(d_outData); - if(kernel1) - clReleaseKernel(kernel1); - if(kernel2) - clReleaseKernel(kernel2); + if(fetch_kernel) + clReleaseKernel(fetch_kernel); + if(fft_kernel) + clReleaseKernel(fft_kernel); queue_cleanup(); fft_time.valid = 1; return fft_time; @@ -269,10 +264,10 @@ fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, int inv, int iter){ * \param inp : float2 pointer to input data of size N * \param out : float2 pointer to output data of size N * \param inv : int toggle to activate backward FFT - * \param iter : int toggle to activate backward FFT + * \param batch : number of batched executions of 1D FFT * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, int inv, int iter){ +fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, int inv, int batch){ fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_kernel kernel1 = NULL, kernel2 = NULL; @@ -284,25 +279,25 @@ fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, int inv, int iter){ } #ifdef VERBOSE - printf("Launching%s FFT transform for %d iter \n", inv ? " inverse":"", iter); + printf("Launching%s FFT transform for %d batch \n", inv ? " inverse":"", batch); #endif queue_setup(); cl_mem d_inData, d_outData; - printf("Launching%s FFT transform for %d iter \n", inv ? " inverse":"", iter); + printf("Launching%s FFT transform for %d batch \n", inv ? " inverse":"", batch); // Create device buffers - assign the buffers in different banks for more efficient memory access - d_inData = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float2) * N * iter, NULL, &status); + d_inData = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float2) * N * batch, NULL, &status); checkError(status, "Failed to allocate input device buffer\n"); - d_outData = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * N * iter, NULL, &status); + d_outData = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * N * batch, NULL, &status); checkError(status, "Failed to allocate output device buffer\n"); // Copy data from host to device fft_time.pcie_write_t = getTimeinMilliSec(); - status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * N * iter, inp, 0, NULL, NULL); + status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * N * batch, inp, 0, NULL, NULL); fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; checkError(status, "Failed to copy data to device"); @@ -321,7 +316,7 @@ fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, int inv, int iter){ checkError(status, "Failed to set kernel1 arg 0"); status = clSetKernelArg(kernel2, 0, sizeof(cl_mem), (void *)&d_outData); checkError(status, "Failed to set kernel arg 0"); - status = clSetKernelArg(kernel2, 1, sizeof(cl_int), (void*)&iter); + status = clSetKernelArg(kernel2, 1, sizeof(cl_int), (void*)&batch); checkError(status, "Failed to set kernel arg 1"); status = clSetKernelArg(kernel2, 2, sizeof(cl_int), (void*)&inverse_int); checkError(status, "Failed to set kernel arg 2"); @@ -330,7 +325,7 @@ fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, int inv, int iter){ printf(" kernel initialization is complete.\n"); size_t ls = N/8; - size_t gs = iter * ls; + size_t gs = batch * ls; // Measure execution time fft_time.exec_t = getTimeinMilliSec(); @@ -354,7 +349,7 @@ fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, int inv, int iter){ // Copy results from device to host fft_time.pcie_read_t = getTimeinMilliSec(); - status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * iter, out, 0, NULL, NULL); + status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * batch, out, 0, NULL, NULL); fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; checkError(status, "Failed to copy data from device"); @@ -671,8 +666,9 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv, int interl */ fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, int inv, int interleaving) { fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - cl_kernel fft_kernel = NULL, fft_kernel_2 = NULL; - cl_kernel fetch_kernel = NULL, transpose_kernel = NULL, transpose_kernel_2 = NULL; + cl_kernel fft3da_kernel = NULL, fft3db_kernel = NULL, fft3dc_kernel = NULL; + cl_kernel fetch_kernel = NULL, store_kernel = NULL; + cl_kernel transpose_kernel = NULL, transpose3d_kernel = NULL; cl_int status = 0; // if N is not a power of 2 @@ -719,43 +715,60 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, int inv, int interl // Create the kernel - name passed in here must match kernel name in the // original CL file, that was compiled into an AOCX file using the AOC tool - fft_kernel = clCreateKernel(program, "fft3da", &status); - checkError(status, "Failed to create fft3da kernel"); - fft_kernel_2 = clCreateKernel(program, "fft3db", &status); - checkError(status, "Failed to create fft3db kernel"); fetch_kernel = clCreateKernel(program, "fetch", &status); checkError(status, "Failed to create fetch kernel"); - transpose_kernel = clCreateKernel(program, "transpose", &status); + + fft3da_kernel = clCreateKernel(program, "fft3da", &status); + checkError(status, "Failed to create fft3da kernel"); + + transpose_kernel = clCreateKernel(program, "transpose2d", &status); checkError(status, "Failed to create transpose kernel"); - transpose_kernel_2 = clCreateKernel(program, "transpose3d", &status); - checkError(status, "Failed to create transpose3d kernel"); + + fft3db_kernel = clCreateKernel(program, "fft3db", &status); + checkError(status, "Failed to create fft3db kernel"); + + transpose3d_kernel = clCreateKernel(program, "transpose3D", &status); + checkError(status, "Failed to create transpose3D kernel"); + + fft3dc_kernel = clCreateKernel(program, "fft3dc", &status); + checkError(status, "Failed to create fft3dc kernel"); + + store_kernel = clCreateKernel(program, "store", &status); + checkError(status, "Failed to create store kernel"); status = clSetKernelArg(fetch_kernel, 0, sizeof(cl_mem), (void *)&d_inData); - checkError(status, "Failed to set kernel arg 0"); - status = clSetKernelArg(fft_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set kernel arg 1"); - status = clSetKernelArg(transpose_kernel, 0, sizeof(cl_mem), (void *)&d_outData); - checkError(status, "Failed to set kernel arg 2"); - status = clSetKernelArg(fft_kernel_2, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set kernel arg 3"); + checkError(status, "Failed to set fetch kernel arg 0"); + status = clSetKernelArg(fft3da_kernel, 0, sizeof(cl_int),(void*)&inverse_int); + checkError(status, "Failed to set fft3da kernel arg 0"); + status = clSetKernelArg(fft3db_kernel, 0, sizeof(cl_int),(void*)&inverse_int); + checkError(status, "Failed to set fft3db_kernel arg 0"); + status = clSetKernelArg(fft3dc_kernel, 0, sizeof(cl_int),(void*)&inverse_int); + checkError(status, "Failed to set fft3dc_kernel arg 0"); + status = clSetKernelArg(store_kernel, 0, sizeof(cl_mem), (void *)&d_outData); + checkError(status, "Failed to set store kernel arg 0"); fft_time.exec_t = getTimeinMilliSec(); status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fetch kernel"); - // Launch the fft kernel - we launch a single work item hence enqueue a task - status = clEnqueueTask(queue2, fft_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue2, fft3da_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fft kernel"); status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); checkError(status, "Failed to launch transpose kernel"); - status = clEnqueueTask(queue4, fft_kernel_2, 0, NULL, NULL); + status = clEnqueueTask(queue4, fft3db_kernel, 0, NULL, NULL); checkError(status, "Failed to launch second fft kernel"); - status = clEnqueueTask(queue5, transpose_kernel_2, 0, NULL, NULL); + status = clEnqueueTask(queue5, transpose3d_kernel, 0, NULL, NULL); checkError(status, "Failed to launch second transpose kernel"); + status = clEnqueueTask(queue6, fft3dc_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch third fft kernel"); + + status = clEnqueueTask(queue7, store_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch store transpose kernel"); + // Wait for all command queues to complete pending events status = clFinish(queue1); checkError(status, "failed to finish"); @@ -767,6 +780,10 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, int inv, int interl checkError(status, "failed to finish"); status = clFinish(queue5); checkError(status, "failed to finish"); + status = clFinish(queue6); + checkError(status, "failed to finish"); + status = clFinish(queue7); + checkError(status, "failed to finish"); fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; @@ -785,14 +802,18 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, int inv, int interl if(fetch_kernel) clReleaseKernel(fetch_kernel); - if(fft_kernel) - clReleaseKernel(fft_kernel); - if(fft_kernel_2) - clReleaseKernel(fft_kernel_2); + if(fft3da_kernel) + clReleaseKernel(fft3da_kernel); + if(fft3db_kernel) + clReleaseKernel(fft3db_kernel); + if(fft3dc_kernel) + clReleaseKernel(fft3dc_kernel); if(transpose_kernel) clReleaseKernel(transpose_kernel); - if(transpose_kernel_2) - clReleaseKernel(transpose_kernel_2); + if(transpose3d_kernel) + clReleaseKernel(transpose3d_kernel); + if(store_kernel) + clReleaseKernel(store_kernel); fft_time.valid = 1; return fft_time; @@ -870,8 +891,8 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv) { float2 *h_inData, *h_outData; // allocate SVM buffers // Required outside the if stm so that compiler doesn't warm about uninitialized variables - h_inData = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); - h_outData = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); + h_inData = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); + h_outData = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); if(svm_enabled){ @@ -886,6 +907,19 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv) { status = clEnqueueSVMUnmap(queue1, (void *)h_inData, 0, NULL, NULL); checkError(status, "Failed to unmap input data"); + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map input data"); + + // copy data into h_inData + for(size_t i = 0; i < num_pts; i++){ + h_outData[i].x = 0.0; + h_outData[i].y = 0.0; + } + + status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); + checkError(status, "Failed to unmap input data"); + // write to fetch kernel using SVM based PCIe status = clSetKernelArgSVMPointer(fetch1_kernel, 0, (void *)h_inData); checkError(status, "Failed to set fetch1 kernel arg"); @@ -956,33 +990,27 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv) { status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); checkError(status, "Failed to launch second transpose kernel"); - // Wait for all command queues to complete pending events - status = clFinish(queue1); - checkError(status, "failed to finish"); - status = clFinish(queue2); - checkError(status, "failed to finish"); - status = clFinish(queue3); - checkError(status, "failed to finish"); - status = clFinish(queue4); - checkError(status, "failed to finish"); - status = clFinish(queue5); - checkError(status, "failed to finish"); - - status = clEnqueueTask(queue1, fetch2_kernel, 0, NULL, NULL); + // enqueue fetch to same queue as the store kernel due to data dependency + status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fetch kernel"); - status = clEnqueueTask(queue2, fftc_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fft kernel"); status = clEnqueueTask(queue3, store2_kernel, 0, NULL, NULL); checkError(status, "Failed to launch transpose kernel"); - status = clFinish(queue1); + status = clFinish(queue5); checkError(status, "failed to finish"); - status = clFinish(queue2); + status = clFinish(queue4); checkError(status, "failed to finish"); status = clFinish(queue3); checkError(status, "failed to finish"); + status = clFinish(queue2); + checkError(status, "failed to finish"); + status = clFinish(queue1); + checkError(status, "failed to finish"); + fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; if(svm_enabled){ @@ -1099,8 +1127,8 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, float2 *inp, float2 *out, int inv, i float2 *h_inData[how_many], *h_outData[how_many]; for(size_t i = 0; i < how_many; i++){ - h_inData[i] = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); - h_outData[i] = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); + h_inData[i] = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); + h_outData[i] = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData[i], sizeof(float2) * num_pts, 0, NULL, NULL); checkError(status, "Failed to map input data"); @@ -1114,6 +1142,18 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, float2 *inp, float2 *out, int inv, i status = clEnqueueSVMUnmap(queue1, (void *)h_inData[i], 0, NULL, NULL); checkError(status, "Failed to unmap input data"); + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_outData[i], sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map input data"); + + // copy data into h_inData + for(size_t j = 0; j < num_pts; j++){ + h_outData[i][j].x = 0.0; + h_outData[i][j].y = 0.0; + } + + status = clEnqueueSVMUnmap(queue1, (void *)h_outData[i], 0, NULL, NULL); + checkError(status, "Failed to unmap input data"); } /* diff --git a/examples/common/helper.h b/examples/common/helper.h index 6ec4a71..e72c2bf 100755 --- a/examples/common/helper.h +++ b/examples/common/helper.h @@ -12,7 +12,7 @@ bool fft_create_data(double2 *inp, int N); void print_config(int N, int dim, int iter, int inv, int sp, int batch, int use_bram); -void display_measures(double total_api_time, double pcie_rd, double pcie_wr, double exec, int N, int dim, int iter, int inv, int sp); +void display_measures(double total_api_time, double pcie_rd, double pcie_wr, double exec, int N, int dim, int iter, int batch, int inv, int sp); double getTimeinMilliseconds(); #endif // HELPER_H diff --git a/examples/fft1d.c b/examples/fft1d.c index 5663be9..e4bcf32 100644 --- a/examples/fft1d.c +++ b/examples/fft1d.c @@ -22,6 +22,8 @@ int main(int argc, const char **argv) { const char *platform = "Intel(R) FPGA"; fpga_t timing = {0.0, 0.0, 0.0, 0}; int use_svm = 0; + double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; + double temp_timer = 0.0, total_api_time = 0.0; bool status = true, use_emulator = false; struct argparse_option options[] = { @@ -65,40 +67,55 @@ int main(int argc, const char **argv) { return EXIT_SUCCESS; } else{ - size_t inp_sz = sizeof(float2) * N * iter; - float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); - float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); - - status = fftf_create_data(inp, N * iter); - if(!status){ + // find the average of iterations of batched 1D FFTs + // random data every iteration and every batch + for(size_t i = 0; i < iter; i++){ + + size_t inp_sz = sizeof(float2) * N * batch; + + float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); + float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); + + status = fftf_create_data(inp, N * batch); + if(!status){ + fprintf(stderr, "Error in Data Creation \n"); + free(inp); + free(out); + return EXIT_FAILURE; + } + + temp_timer = getTimeinMilliseconds(); + timing = fftfpgaf_c2c_1d(N, inp, out, inv, batch); + total_api_time += getTimeinMilliseconds() - temp_timer; + + // TODO: Verification of bit reversed output + if(timing.valid == 0){ + fprintf(stderr, "Invalid execution, timing found to be 0"); + free(inp); + free(out); + return EXIT_FAILURE; + } + avg_rd += timing.pcie_read_t; + avg_wr += timing.pcie_write_t; + avg_exec += timing.exec_t; + + printf("Iter: %lu\n", i); + printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); + printf("\tKernel: %lfms\n", timing.exec_t); + printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); + + // destroy FFT input and output free(inp); free(out); - return EXIT_FAILURE; } - - timing = fftfpgaf_c2c_1d(N, inp, out, inv, iter); - - free(inp); - free(out); } // destroy data fpga_final(); - if(timing.valid == 1){ - - if(timing.exec_t == 0.0){ - fprintf(stderr, "Invalid measurement. Execute kernel did not run\n"); - return EXIT_FAILURE; - } - display_measures(0.0, timing.pcie_read_t, timing.pcie_write_t, timing.exec_t, N, dim, iter, inv, sp); - } - else{ - fprintf(stderr, "Invalid timing measurement. Function returned prematurely\n"); - return EXIT_FAILURE; - } + display_measures(total_api_time, avg_rd, avg_wr, avg_exec, N, dim, iter, batch, inv, sp); return EXIT_SUCCESS; } \ No newline at end of file diff --git a/examples/fft2d.c b/examples/fft2d.c index 3ce12d8..98f074f 100644 --- a/examples/fft2d.c +++ b/examples/fft2d.c @@ -113,6 +113,11 @@ int main(int argc, const char **argv) { avg_wr += timing.pcie_write_t; avg_exec += timing.exec_t; + printf("Iter: %lu\n", i); + printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); + printf("\tKernel: %lfms\n", timing.exec_t); + printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); + // destroy FFT input and output free(inp); free(out); @@ -123,7 +128,7 @@ int main(int argc, const char **argv) { fpga_final(); // display performance measures - display_measures(total_api_time, avg_rd, avg_wr, avg_exec, N, dim, iter, inv, sp); + display_measures(total_api_time, avg_rd, avg_wr, avg_exec, N, dim, iter, batch, inv, sp); return EXIT_SUCCESS; } diff --git a/examples/fft3d.c b/examples/fft3d.c index 7f19a8d..f46244f 100755 --- a/examples/fft3d.c +++ b/examples/fft3d.c @@ -79,7 +79,6 @@ int main(int argc, const char **argv) { float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); status = fftf_create_data(inp, N * N * N); - printf("\n\n"); if(!status){ fprintf(stderr, "Error in Data Creation \n"); free(inp); @@ -134,7 +133,7 @@ int main(int argc, const char **argv) { fpga_final(); // display performance measures - display_measures(total_api_time, avg_rd, avg_wr, avg_exec, N, dim, iter, inv, sp); + display_measures(total_api_time, avg_rd, avg_wr, avg_exec, N, dim, iter, batch, inv, sp); return EXIT_SUCCESS; } diff --git a/examples/fft3d_svm_batch.c b/examples/fft3d_svm_batch.c index a622ca8..c9efaac 100755 --- a/examples/fft3d_svm_batch.c +++ b/examples/fft3d_svm_batch.c @@ -121,7 +121,7 @@ int main(int argc, const char **argv) { fpga_final(); // display performance measures - display_measures(total_api_time, avg_rd, avg_wr, avg_exec, N, dim, iter, inv, sp); + display_measures(total_api_time, avg_rd, avg_wr, avg_exec, N, dim, iter, batch, inv, sp); return EXIT_SUCCESS; } From 26187f845ab5196b5561410187aeadcc083a3f0d Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Thu, 20 Aug 2020 11:57:13 +0200 Subject: [PATCH 22/76] clFinish stms in DDR buffer reads --- api/src/fftfpga.c | 33 +++++++++++++++++++++++++++++++++ examples/common/helper.c | 6 ++++-- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/api/src/fftfpga.c b/api/src/fftfpga.c index 28ddcc1..df5a7fc 100755 --- a/api/src/fftfpga.c +++ b/api/src/fftfpga.c @@ -190,6 +190,9 @@ fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, int inv, int batch){ status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(double2) * N * batch, inp, 0, NULL, NULL); + status = clFinish(queue1); + checkError(status, "failed to finish writing buffer using PCIe"); + fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; checkError(status, "Failed to copy data to device"); @@ -241,6 +244,10 @@ fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, int inv, int batch){ // Copy results from device to host fft_time.pcie_read_t = getTimeinMilliSec(); status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * batch, out, 0, NULL, NULL); + + status = clFinish(queue1); + checkError(status, "failed to finish reading buffer using PCIe"); + fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; checkError(status, "Failed to copy data from device"); @@ -299,6 +306,9 @@ fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, int inv, int batch){ status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * N * batch, inp, 0, NULL, NULL); + status = clFinish(queue1); + checkError(status, "failed to finish writing buffer using PCIe"); + fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; checkError(status, "Failed to copy data to device"); @@ -350,6 +360,10 @@ fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, int inv, int batch){ // Copy results from device to host fft_time.pcie_read_t = getTimeinMilliSec(); status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * batch, out, 0, NULL, NULL); + + status = clFinish(queue1); + checkError(status, "failed to finish reading buffer using PCIe"); + fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; checkError(status, "Failed to copy data from device"); @@ -408,6 +422,9 @@ fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, int inv){ status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * N * N, inp, 0, NULL, NULL); + status = clFinish(queue1); + checkError(status, "failed to finish writing buffer using PCIe"); + fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; checkError(status, "Failed to copy data to device"); @@ -470,6 +487,10 @@ fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, int inv){ // Copy results from device to host fft_time.pcie_read_t = getTimeinMilliSec(); status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * N, out, 0, NULL, NULL); + + status = clFinish(queue1); + checkError(status, "failed to finish reading buffer using PCIe"); + fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; checkError(status, "Failed to copy data from device"); @@ -627,6 +648,10 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv, int interl // Copy results from device to host fft_time.pcie_read_t = getTimeinMilliSec(); status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * num_pts, out, 0, NULL, NULL); + + status = clFinish(queue1); + checkError(status, "failed to finish reading buffer using PCIe"); + fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; checkError(status, "Failed to copy data from device"); @@ -790,6 +815,10 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, int inv, int interl // Copy results from device to host fft_time.pcie_read_t = getTimeinMilliSec(); status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * N * N, out, 0, NULL, NULL); + + status = clFinish(queue1); + checkError(status, "failed to finish reading buffer using PCIe"); + fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; checkError(status, "Failed to copy data from device"); @@ -1036,6 +1065,10 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv) { // Copy results from device to host fft_time.pcie_read_t = getTimeinMilliSec(); status = clEnqueueReadBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * num_pts, out, 0, NULL, NULL); + + status = clFinish(queue1); + checkError(status, "failed to finish reading DDR using PCIe"); + fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; checkError(status, "Failed to copy data from device"); } diff --git a/examples/common/helper.c b/examples/common/helper.c index cdc1284..6dffed2 100755 --- a/examples/common/helper.c +++ b/examples/common/helper.c @@ -85,7 +85,7 @@ void print_config(int N, int dim, int iter, int inv, int sp, int batch, int use_ * \param inv: 1 if backward transform * \param single precision floating point transformation */ -void display_measures(double total_api_time, double pcie_rd, double pcie_wr, double exec_t, int N, int dim, int iter, int inv, int sp){ +void display_measures(double total_api_time, double pcie_rd, double pcie_wr, double exec_t, int N, int dim, int iter, int batch, int inv, int sp){ double avg_api_time = 0.0; @@ -116,7 +116,9 @@ void display_measures(double total_api_time, double pcie_rd, double pcie_wr, dou printf("Precision = %s\n", sp==1 ? "Single": "Double"); printf("Direction = %s\n", inv ? "Backward":"Forward"); printf("Iterations = %d\n", iter); - printf("%s", iter>1 ? "Average Measurements\n":""); + printf("Batch = %d\n", batch); + + printf("%s", iter>1 ? "Average Measurements of iterations\n":""); printf("PCIe Write = %.2lfms\n", pcie_write); printf("Kernel Execution = %.2lfms\n", exec); printf("PCIe Read = %.2lfms\n", pcie_read); From 6d3ec73083ce909b0a268433001c8914c9d3a055 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Thu, 20 Aug 2020 17:47:13 +0200 Subject: [PATCH 23/76] FFT3D: separated svm and ddr --- api/include/fftfpga/fftfpga.h | 2 + api/src/fftfpga.c | 310 +++++++++++++++++++++++----------- examples/CMakeLists.txt | 2 +- examples/fft3d.c | 2 - examples/fft3d_svm.c | 137 +++++++++++++++ 5 files changed, 354 insertions(+), 99 deletions(-) create mode 100755 examples/fft3d_svm.c diff --git a/api/include/fftfpga/fftfpga.h b/api/include/fftfpga/fftfpga.h index bca32a7..f63542f 100755 --- a/api/include/fftfpga/fftfpga.h +++ b/api/include/fftfpga/fftfpga.h @@ -132,6 +132,8 @@ extern fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, int inv, int */ extern fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv); +extern fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, float2 *inp, float2 *out, int inv); + extern fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, float2 *inp, float2 *out, int inv, int how_many); diff --git a/api/src/fftfpga.c b/api/src/fftfpga.c index df5a7fc..fcd2e5c 100755 --- a/api/src/fftfpga.c +++ b/api/src/fftfpga.c @@ -849,7 +849,7 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, int inv, int interl } /** - * \brief compute an out-of-place single precision complex 3D-FFT using the DDR of the FPGA + * \brief compute an out-of-place single precision complex 3D FFT using the DDR for 3D Transpose where the data access between the host and the FPGA is using Shared Virtual Memory (SVM) * \param N : integer pointer addressing the size of FFT3d * \param inp : float2 pointer to input data of size [N * N * N] * \param out : float2 pointer to output data of size [N * N * N] @@ -857,7 +857,7 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, int inv, int interl * \param interleaving : 1 if using burst interleaved global memory buffers * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv) { +fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, float2 *inp, float2 *out, int inv) { fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_int status = 0; int num_pts = N * N * N; @@ -911,98 +911,235 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv) { queue_setup(); // Device memory buffers - cl_mem d_inData, d_outData; - d_inData = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); - checkError(status, "Failed to allocate input device buffer\n"); - d_outData = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + cl_mem d_inOutData; + d_inOutData = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); checkError(status, "Failed to allocate output device buffer\n"); - float2 *h_inData, *h_outData; // allocate SVM buffers - // Required outside the if stm so that compiler doesn't warm about uninitialized variables + float2 *h_inData, *h_outData; h_inData = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); h_outData = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); - if(svm_enabled){ + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map input data"); - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData, sizeof(float2) * num_pts, 0, NULL, NULL); - checkError(status, "Failed to map input data"); + // copy data into h_inData + for(size_t i = 0; i < num_pts; i++){ + h_inData[i].x = inp[i].x; + h_inData[i].y = inp[i].y; + } - // copy data into h_inData - for(size_t i = 0; i < num_pts; i++){ - h_inData[i].x = inp[i].x; - h_inData[i].y = inp[i].y; - } + status = clEnqueueSVMUnmap(queue1, (void *)h_inData, 0, NULL, NULL); + checkError(status, "Failed to unmap input data"); - status = clEnqueueSVMUnmap(queue1, (void *)h_inData, 0, NULL, NULL); - checkError(status, "Failed to unmap input data"); + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map input data"); - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); - checkError(status, "Failed to map input data"); + // copy data into h_inData + for(size_t i = 0; i < num_pts; i++){ + h_outData[i].x = 0.0; + h_outData[i].y = 0.0; + } - // copy data into h_inData - for(size_t i = 0; i < num_pts; i++){ - h_outData[i].x = 0.0; - h_outData[i].y = 0.0; - } + status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); + checkError(status, "Failed to unmap input data"); - status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); - checkError(status, "Failed to unmap input data"); + // write to fetch kernel using SVM based PCIe + status = clSetKernelArgSVMPointer(fetch1_kernel, 0, (void *)h_inData); + checkError(status, "Failed to set fetch1 kernel arg"); - // write to fetch kernel using SVM based PCIe - status = clSetKernelArgSVMPointer(fetch1_kernel, 0, (void *)h_inData); - checkError(status, "Failed to set fetch1 kernel arg"); + status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set ffta kernel arg"); + status=clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftb kernel arg"); + + // kernel stores to DDR memory + status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_inOutData); + checkError(status, "Failed to set store1 kernel arg"); - status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set ffta kernel arg"); - status=clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fftb kernel arg"); + // kernel fetches from DDR memory + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_inOutData); + checkError(status, "Failed to set fetch2 kernel arg"); + status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftc kernel arg"); - // kernel stores to DDR memory - status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_outData); - checkError(status, "Failed to set store1 kernel arg"); + // kernel stores using SVM based PCIe to host + status = clSetKernelArgSVMPointer(store2_kernel, 0, (void *)h_outData); + checkError(status, "Failed to set store2 kernel arg"); - // kernel fetches from DDR memory - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData); - checkError(status, "Failed to set fetch2 kernel arg"); - status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fftc kernel arg"); + fft_time.exec_t = getTimeinMilliSec(); + status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); - // kernel stores using SVM based PCIe to host - status = clSetKernelArgSVMPointer(store2_kernel, 0, (void *)h_outData); - checkError(status, "Failed to set store2 kernel arg"); + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second fft kernel"); + + status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second transpose kernel"); + + // enqueue fetch to same queue as the store kernel due to data dependency + status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, store2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clFinish(queue5); + checkError(status, "failed to finish"); + status = clFinish(queue4); + checkError(status, "failed to finish"); + status = clFinish(queue3); + checkError(status, "failed to finish"); + status = clFinish(queue2); + checkError(status, "failed to finish"); + status = clFinish(queue1); + checkError(status, "failed to finish"); + + fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, + (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map out data"); + + for(size_t i = 0; i < num_pts; i++){ + out[i].x = h_outData[i].x; + out[i].y = h_outData[i].y; } - else{ - // Copy data from host to device - fft_time.pcie_write_t = getTimeinMilliSec(); - status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * num_pts, inp, 0, NULL, NULL); + status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); + checkError(status, "Failed to unmap out data"); - status = clFinish(queue1); - checkError(status, "failed to finish"); + if (h_inData) + clSVMFree(context, h_inData); + if (h_outData) + clSVMFree(context, h_outData); - fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; - checkError(status, "Failed to copy data to device"); + queue_cleanup(); - status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData); - checkError(status, "Failed to set fetch1 kernel arg"); + if (d_inOutData) + clReleaseMemObject(d_inOutData); + + if(fetch1_kernel) + clReleaseKernel(fetch1_kernel); + if(fetch2_kernel) + clReleaseKernel(fetch2_kernel); - status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set ffta kernel arg"); - status=clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fftb kernel arg"); + if(ffta_kernel) + clReleaseKernel(ffta_kernel); + if(fftb_kernel) + clReleaseKernel(fftb_kernel); + if(fftc_kernel) + clReleaseKernel(fftc_kernel); - status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_outData); - checkError(status, "Failed to set store1 kernel arg"); + if(transpose_kernel) + clReleaseKernel(transpose_kernel); - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData); - checkError(status, "Failed to set fetch2 kernel arg"); - status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fftc kernel arg"); - status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_inData); - checkError(status, "Failed to set store2 kernel arg"); + if(store1_kernel) + clReleaseKernel(store1_kernel); + if(store2_kernel) + clReleaseKernel(store2_kernel); + + fft_time.valid = 1; + return fft_time; +} + +/** + * \brief compute an out-of-place single precision complex 3D-FFT using the DDR of the FPGA for 3D Transpose + * \param N : integer pointer addressing the size of FFT3d + * \param inp : float2 pointer to input data of size [N * N * N] + * \param out : float2 pointer to output data of size [N * N * N] + * \param inv : int toggle to activate backward FFT + * \param interleaving : 1 if using burst interleaved global memory buffers + * \return fpga_t : time taken in milliseconds for data transfers and execution + */ +fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv) { + fpga_t fft_time = {0.0, 0.0, 0.0, 0}; + cl_int status = 0; + int num_pts = N * N * N; + + // if N is not a power of 2 + if(inp == NULL || out == NULL || ( (N & (N-1)) !=0)){ + return fft_time; } +#ifdef VERBOSE + printf("Launching%s 3d FFT transform in DDR \n", inv ? " inverse":""); +#endif + + // Can't pass bool to device, so convert it to int + int inverse_int = inv; + + // Setup kernels + cl_kernel fetch1_kernel = clCreateKernel(program, "fetchBitrev1", &status); + checkError(status, "Failed to create fetch1 kernel"); + cl_kernel ffta_kernel = clCreateKernel(program, "fft3da", &status); + checkError(status, "Failed to create fft3da kernel"); + cl_kernel transpose_kernel = clCreateKernel(program, "transpose", &status); + checkError(status, "Failed to create transpose kernel"); + cl_kernel fftb_kernel = clCreateKernel(program, "fft3db", &status); + checkError(status, "Failed to create fft3db kernel"); + cl_kernel store1_kernel = clCreateKernel(program, "transposeStore1", &status); + checkError(status, "Failed to create store1 kernel"); + + cl_kernel fetch2_kernel = clCreateKernel(program, "fetchBitrev2", &status); + checkError(status, "Failed to create fetch2 kernel"); + cl_kernel fftc_kernel = clCreateKernel(program, "fft3dc", &status); + checkError(status, "Failed to create fft3dc kernel"); + cl_kernel store2_kernel = clCreateKernel(program, "transposeStore2", &status); + checkError(status, "Failed to create store2 kernel"); + + // Setup Queues to the kernels + queue_setup(); + + // Device memory buffers + cl_mem d_inData, d_transpose, d_outData; + d_inData = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate input device buffer\n"); + + d_transpose = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + d_outData = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + // Copy data from host to device + fft_time.pcie_write_t = getTimeinMilliSec(); + + status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * num_pts, inp, 0, NULL, NULL); + + status = clFinish(queue1); + checkError(status, "failed to finish"); + + fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; + checkError(status, "Failed to copy data to device"); + + status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData); + checkError(status, "Failed to set fetch1 kernel arg"); + + status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set ffta kernel arg"); + status=clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftb kernel arg"); + + status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_transpose); + checkError(status, "Failed to set store1 kernel arg"); + + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_transpose); + checkError(status, "Failed to set fetch2 kernel arg"); + status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftc kernel arg"); + status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData); + checkError(status, "Failed to set store2 kernel arg"); + fft_time.exec_t = getTimeinMilliSec(); status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fetch kernel"); @@ -1042,36 +1179,15 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv) { fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; - if(svm_enabled){ - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, - (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); - checkError(status, "Failed to map out data"); - - for(size_t i = 0; i < num_pts; i++){ - out[i].x = h_outData[i].x; - out[i].y = h_outData[i].y; - } - - status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); - checkError(status, "Failed to unmap out data"); - - if (h_inData) - clSVMFree(context, h_inData); - if (h_outData) - clSVMFree(context, h_outData); - - } - else{ - // Copy results from device to host - fft_time.pcie_read_t = getTimeinMilliSec(); - status = clEnqueueReadBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * num_pts, out, 0, NULL, NULL); - - status = clFinish(queue1); - checkError(status, "failed to finish reading DDR using PCIe"); + // Copy results from device to host + fft_time.pcie_read_t = getTimeinMilliSec(); + status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * num_pts, out, 0, NULL, NULL); + + status = clFinish(queue1); + checkError(status, "failed to finish reading DDR using PCIe"); - fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; - checkError(status, "Failed to copy data from device"); - } + fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; + checkError(status, "Failed to copy data from device"); queue_cleanup(); @@ -1079,6 +1195,8 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv) { clReleaseMemObject(d_inData); if (d_outData) clReleaseMemObject(d_outData); + if (d_transpose) + clReleaseMemObject(d_transpose); if(fetch1_kernel) clReleaseKernel(fetch1_kernel); diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 899e5a8..bc124c9 100755 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -4,7 +4,7 @@ project(examplesfftfpga VERSION 0.1 DESCRIPTION "Example Code that uses libfftfpga" LANGUAGES C CXX) -set(examples fft3d fft2d fft1d fft3d_svm_batch) +set(examples fft3d fft3d_svm fft3d_svm_batch fft2d fft1d) # create a target for each of the example foreach(example ${examples}) diff --git a/examples/fft3d.c b/examples/fft3d.c index f46244f..4ca5c77 100755 --- a/examples/fft3d.c +++ b/examples/fft3d.c @@ -34,8 +34,6 @@ int main(int argc, const char **argv) { OPT_BOOLEAN('s',"sp", &sp, "Single Precision"), OPT_INTEGER('i',"iter", &iter, "Iterations"), OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), - OPT_BOOLEAN('v',"svm", &use_svm, "Use SVM"), - OPT_INTEGER('c',"batch", &batch, "Batch"), OPT_BOOLEAN('m',"bram", &use_bram, "Use BRAM"), OPT_BOOLEAN('t',"interleaving", &interleaving, "Use burst interleaving in case of BRAM designs"), OPT_STRING('p', "path", &path, "Path to bitstream"), diff --git a/examples/fft3d_svm.c b/examples/fft3d_svm.c new file mode 100755 index 0000000..ad96385 --- /dev/null +++ b/examples/fft3d_svm.c @@ -0,0 +1,137 @@ +// Author: Arjun Ramaswami + +#include +#include // EXIT_FAILURE +#include +#include + +#include "CL/opencl.h" +#include "fftfpga/fftfpga.h" + +#include "argparse.h" +#include "helper.h" +#include "verify_fftw.h" + +static const char *const usage[] = { + "bin/host [options]", + NULL, +}; + +int main(int argc, const char **argv) { + int N = 64, dim = 3, iter = 1, inv = 0, sp = 0, use_bram = 0, batch = 1,interleaving = 0; + char *path = "fft3d_emulate.aocx"; + const char *platform; + fpga_t timing = {0.0, 0.0, 0.0, 0}; + int use_svm = 1; + double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; + double temp_timer = 0.0, total_api_time = 0.0; + bool status = true, use_emulator = false; + + struct argparse_option options[] = { + OPT_HELP(), + OPT_GROUP("Basic Options"), + OPT_INTEGER('n',"n", &N, "FFT Points"), + OPT_BOOLEAN('s',"sp", &sp, "Single Precision"), + OPT_INTEGER('i',"iter", &iter, "Iterations"), + OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), + OPT_BOOLEAN('m',"bram", &use_bram, "Use BRAM"), + OPT_BOOLEAN('t',"interleaving", &interleaving, "Use burst interleaving in case of BRAM designs"), + OPT_STRING('p', "path", &path, "Path to bitstream"), + OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), + OPT_END(), + }; + + struct argparse argparse; + argparse_init(&argparse, options, usage, 0); + argparse_describe(&argparse, "Computing FFT using FPGA", "FFT size and dimensions are mandatory, default dimension and number of iterations are 1"); + argc = argparse_parse(&argparse, argc, argv); + + // Print to console the configuration chosen to execute during runtime + print_config(N, dim, iter, inv, sp, batch, use_bram); + + if(use_emulator){ + platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; + //platform = "Intel(R) FPGA"; + } + else{ + platform = "Intel(R) FPGA SDK for OpenCL(TM)"; + //platform = "Intel(R) FPGA"; + } + + int isInit = fpga_initialize(platform, path, use_svm); + if(isInit != 0){ + fprintf(stderr, "FPGA initialization error\n"); + return EXIT_FAILURE; + } + + if(sp == 0){ + printf("Not implemented. Work in Progress\n"); + return EXIT_SUCCESS; + } + else{ + for(size_t i = 0; i < iter; i++){ + + // create and destroy data every iteration + size_t inp_sz = sizeof(float2) * N * N * N; + float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); + float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); + + status = fftf_create_data(inp, N * N * N); + if(!status){ + fprintf(stderr, "Error in Data Creation \n"); + free(inp); + free(out); + return EXIT_FAILURE; + } + + if(use_bram == 1){ + // use bram for 3d Transpose + temp_timer = getTimeinMilliseconds(); + timing = fftfpgaf_c2c_3d_bram(N, inp, out, inv, interleaving); + total_api_time += getTimeinMilliseconds() - temp_timer; + } + else{ + // use ddr for 3d Transpose + temp_timer = getTimeinMilliseconds(); + timing = fftfpgaf_c2c_3d_ddr_svm(N, inp, out, inv); + total_api_time += getTimeinMilliseconds() - temp_timer; + } + +#ifdef USE_FFTW + if(!verify_sp_fft3d_fftw(out, inp, N, inv, 1)){ + fprintf(stderr, "3d FFT Verification Failed \n"); + free(inp); + free(out); + return EXIT_FAILURE; + } +#endif + if(timing.valid == 0){ + fprintf(stderr, "Invalid execution, timing found to be 0"); + free(inp); + free(out); + return EXIT_FAILURE; + } + + avg_rd += timing.pcie_read_t; + avg_wr += timing.pcie_write_t; + avg_exec += timing.exec_t; + + printf("Iter: %lu\n", i); + printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); + printf("\tKernel: %lfms\n", timing.exec_t); + printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); + + // destroy FFT input and output + free(inp); + free(out); + } // iter + } // sp condition + + // destroy fpga state + fpga_final(); + + // display performance measures + display_measures(total_api_time, avg_rd, avg_wr, avg_exec, N, dim, iter, batch, inv, sp); + + return EXIT_SUCCESS; +} From 2a0176b70ba7f322fd083ff54885085549d724f7 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Tue, 8 Sep 2020 15:06:50 +0200 Subject: [PATCH 24/76] FFT3D: Working opt bram transpose, bool instead of int --- api/include/fftfpga/fftfpga.h | 22 +- api/src/fftfpga.c | 182 ++++++- examples/CMakeLists.txt | 2 +- examples/common/helper.c | 22 +- examples/common/helper.h | 4 +- examples/common/verify_fftw.c | 8 +- examples/common/verify_fftw.h | 4 +- examples/fft1d.c | 93 ++-- examples/fft1d_svm.c | 113 ++++ examples/fft2d.c | 121 +++-- examples/{fft3d.c => fft3d_bram.c} | 113 ++-- examples/fft3d_ddr.c | 126 +++++ examples/fft3d_ddr_svm.c | 125 +++++ ...ft3d_svm_batch.c => fft3d_ddr_svm_batch.c} | 96 ++-- examples/fft3d_svm.c | 46 +- kernels/fft3d/CMakeLists.txt | 2 +- kernels/fft3d/fft3d_bram.cl | 2 +- kernels/fft3d/fft3d_bram_opt.cl | 495 ++++++++++++++++++ tests/test_fft1d_fpga.cpp | 3 +- tests/test_fft2d_fpga.cpp | 3 +- tests/test_fft3d_fpga.cpp | 3 +- tests/test_fft_setup.cpp | 9 +- 22 files changed, 1280 insertions(+), 314 deletions(-) create mode 100644 examples/fft1d_svm.c rename examples/{fft3d.c => fft3d_bram.c} (51%) create mode 100755 examples/fft3d_ddr.c create mode 100755 examples/fft3d_ddr_svm.c rename examples/{fft3d_svm_batch.c => fft3d_ddr_svm_batch.c} (57%) mode change 100755 => 100644 examples/fft3d_svm.c create mode 100755 kernels/fft3d/fft3d_bram_opt.cl diff --git a/api/include/fftfpga/fftfpga.h b/api/include/fftfpga/fftfpga.h index f63542f..4c46e8b 100755 --- a/api/include/fftfpga/fftfpga.h +++ b/api/include/fftfpga/fftfpga.h @@ -8,6 +8,8 @@ #ifndef FFTFPGA_H #define FFTFPGA_H +#include + /** * Single Precision Complex Floating Point Data Structure */ @@ -46,7 +48,7 @@ typedef struct fpga_timing { -4 Failed to create program, file not found in path -5 Device does not support required SVM */ -extern int fpga_initialize(const char *platform_name, const char *path, int use_svm); +extern int fpga_initialize(const char *platform_name, const char *path, bool use_svm); /** * @brief Release FPGA Resources @@ -76,7 +78,9 @@ extern void* fftfpgaf_complex_malloc(size_t sz); * @param iter : number of iterations of the N point FFT * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, int inv, int iter); +extern fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, bool inv, int iter); + +extern fpga_t fftfpgaf_c2c_1d_svm(int N, float2 *inp, float2 *out, bool inv, int batch); /** * @brief compute an out-of-place single precision complex 1D-FFT on the FPGA @@ -87,7 +91,7 @@ extern fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, int inv, int ite * @param iter : number of iterations of the N point FFT * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, int inv, int iter); +extern fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, bool inv, int iter); /** * @brief compute an out-of-place single precision complex 2D-FFT using the BRAM of the FPGA @@ -99,7 +103,7 @@ extern fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, int inv, int iter * @param how_many : number of 2D FFTs to computer, default 1 * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv, int interleaving, int how_many); +extern fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, bool inv, int interleaving, int how_many); /** * @brief compute an out-of-place single precision complex 2D-FFT using the DDR of the FPGA @@ -109,7 +113,7 @@ extern fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv, int * @param inv : int toggle to activate backward FFT * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, int inv); +extern fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, bool inv); /** * @brief compute an out-of-place single precision complex 3D-FFT using the BRAM of the FPGA @@ -120,7 +124,7 @@ extern fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, int inv); * @param interleaving : 1 if using burst interleaved global memory buffers * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, int inv, int interleaving); +extern fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, bool inv, bool interleaving); /** * @brief compute an out-of-place single precision complex 3D-FFT using the DDR of the FPGA @@ -130,11 +134,11 @@ extern fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, int inv, int * @param inv : int toggle to activate backward FFT * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv); +extern fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, bool inv); -extern fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, float2 *inp, float2 *out, int inv); +extern fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, float2 *inp, float2 *out, bool inv); -extern fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, float2 *inp, float2 *out, int inv, int how_many); +extern fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, float2 *inp, float2 *out, bool inv, int how_many); #endif diff --git a/api/src/fftfpga.c b/api/src/fftfpga.c index fcd2e5c..f763a23 100755 --- a/api/src/fftfpga.c +++ b/api/src/fftfpga.c @@ -4,6 +4,7 @@ #include #include #include +#include #define CL_VERSION_2_0 #include // to disable interleaving & transfer data to specific banks - CL_CHANNEL_1_INTELFPGA #include "CL/opencl.h" @@ -73,7 +74,7 @@ void* fftfpgaf_complex_malloc(size_t sz){ -5 Device does not support required SVM */ -int fpga_initialize(const char *platform_name, const char *path, int use_svm){ +int fpga_initialize(const char *platform_name, const char *path, bool use_svm){ cl_int status = 0; #ifdef VERBOSE @@ -162,7 +163,7 @@ void fpga_final(){ * \param batch : number of batched executions of 1D FFT * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, int inv, int batch){ +fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, bool inv, int batch){ fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_kernel fetch_kernel = NULL, fft_kernel = NULL; cl_int status = 0; @@ -197,7 +198,7 @@ fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, int inv, int batch){ checkError(status, "Failed to copy data to device"); // Can't pass bool to device, so convert it to int - int inverse_int = inv; + int inverse_int = (int)inv; // Create Kernels - names must match the kernel name in the original CL file fetch_kernel = clCreateKernel(program, "fetch", &status); @@ -270,11 +271,11 @@ fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, int inv, int batch){ * \param N : integer pointer to size of FFT3d * \param inp : float2 pointer to input data of size N * \param out : float2 pointer to output data of size N - * \param inv : int toggle to activate backward FFT + * \param inv : true for backward transforms * \param batch : number of batched executions of 1D FFT * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, int inv, int batch){ +fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, bool inv, int batch){ fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_kernel kernel1 = NULL, kernel2 = NULL; @@ -295,10 +296,10 @@ fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, int inv, int batch){ printf("Launching%s FFT transform for %d batch \n", inv ? " inverse":"", batch); // Create device buffers - assign the buffers in different banks for more efficient memory access - d_inData = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float2) * N * batch, NULL, &status); + d_inData = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float2) * N * batch, NULL, &status); checkError(status, "Failed to allocate input device buffer\n"); - d_outData = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * N * batch, NULL, &status); + d_outData = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * N * batch, NULL, &status); checkError(status, "Failed to allocate output device buffer\n"); // Copy data from host to device @@ -313,7 +314,7 @@ fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, int inv, int batch){ checkError(status, "Failed to copy data to device"); // Can't pass bool to device, so convert it to int - int inverse_int = inv; + int inverse_int = (int)inv; // Create Kernels - names must match the kernel name in the original CL file kernel1 = clCreateKernel(program, "fetch", &status); @@ -382,6 +383,130 @@ fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, int inv, int batch){ return fft_time; } +/** + * \brief compute an out-of-place single precision complex 1D-FFT on the FPGA using Shared Virtual Memory for data transfers between host's main memory and FPGA + * \param N : integer pointer to size of FFT3d + * \param inp : float2 pointer to input data of size N + * \param out : float2 pointer to output data of size N + * \param inv : int toggle to activate backward FFT + * \param batch : number of batched executions of 1D FFT + * \return fpga_t : time taken in milliseconds for data transfers and execution + */ +fpga_t fftfpgaf_c2c_1d_svm(int N, float2 *inp, float2 *out, bool inv, int batch){ + fpga_t fft_time = {0.0, 0.0, 0.0, 0}; + cl_int status = 0; + int num_pts = N * N * N; + + // if N is not a power of 2 + if(inp == NULL || out == NULL || ( (N & (N-1)) !=0)){ + return fft_time; + } + +#ifdef VERBOSE + printf("Launching%s 3d FFT transform in DDR \n", inv ? " inverse":""); +#endif + + // Can't pass bool to device, so convert it to int + int inverse_int = (int)inv; + + // Setup kernels + cl_kernel fetch_kernel = clCreateKernel(program, "fetch", &status); + checkError(status, "Failed to create fetch1 kernel"); + cl_kernel fft_kernel = clCreateKernel(program, "fft1d", &status); + checkError(status, "Failed to create fft3da kernel"); + + // Setup Queues to the kernels + queue_setup(); + + // allocate SVM buffers + float2 *h_inData, *h_outData; + h_inData = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); + h_outData = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map input data"); + + // copy data into h_inData + for(size_t i = 0; i < num_pts; i++){ + h_inData[i].x = inp[i].x; + h_inData[i].y = inp[i].y; + } + + status = clEnqueueSVMUnmap(queue1, (void *)h_inData, 0, NULL, NULL); + checkError(status, "Failed to unmap input data"); + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map input data"); + + // copy data into h_inData + for(size_t i = 0; i < num_pts; i++){ + h_outData[i].x = 0.0; + h_outData[i].y = 0.0; + } + + status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); + checkError(status, "Failed to unmap input data"); + + // write to fetch kernel using SVM based PCIe + status = clSetKernelArgSVMPointer(fetch_kernel, 0, (void *)h_inData); + checkError(status, "Failed to set fetch kernel arg"); + + // kernel transforms and stores to DDR memory + status = clSetKernelArgSVMPointer(fft_kernel, 0, (void *)h_outData); + checkError(status, "Failed to set store2 kernel arg"); + + status=clSetKernelArg(fft_kernel, 1, sizeof(cl_int), (void*)&batch); + checkError(status, "Failed to set fft kernel arg"); + + status=clSetKernelArg(fft_kernel, 2, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fft kernel arg"); + + size_t ls = N/8; + size_t gs = batch * ls; + + fft_time.exec_t = getTimeinMilliSec(); + status = clEnqueueTask(queue1, fft_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueNDRangeKernel(queue2, fetch_kernel, 1, NULL, &gs, &ls, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clFinish(queue2); + checkError(status, "failed to finish"); + status = clFinish(queue1); + checkError(status, "failed to finish"); + + fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, + (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map out data"); + + for(size_t i = 0; i < num_pts; i++){ + out[i].x = h_outData[i].x; + out[i].y = h_outData[i].y; + } + + status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); + checkError(status, "Failed to unmap out data"); + + if (h_inData) + clSVMFree(context, h_inData); + if (h_outData) + clSVMFree(context, h_outData); + + queue_cleanup(); + + if(fetch_kernel) + clReleaseKernel(fetch_kernel); + + if(fft_kernel) + clReleaseKernel(fft_kernel); + + fft_time.valid = 1; + return fft_time; +} + /** * \brief compute an out-of-place single precision complex 2D-FFT using the DDR of the FPGA * \param N : integer pointer to size of FFT2d @@ -391,7 +516,7 @@ fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, int inv, int batch){ * \param iter : int toggle to activate backward FFT * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, int inv){ +fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, bool inv){ fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_kernel fetch_kernel = NULL, fft_kernel = NULL, transpose_kernel = NULL; cl_int status = 0; @@ -410,9 +535,9 @@ fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, int inv){ cl_mem d_inData, d_outData, d_tmp; - d_inData = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float2) * N * N, NULL, &status); + d_inData = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float2) * N * N, NULL, &status); checkError(status, "Failed to allocate input device buffer\n"); - d_outData = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float2) * N * N, NULL, &status); + d_outData = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float2) * N * N, NULL, &status); checkError(status, "Failed to allocate output device buffer\n"); d_tmp = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * N * N, NULL, &status); checkError(status, "Failed to allocate output device buffer\n"); @@ -429,7 +554,7 @@ fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, int inv){ checkError(status, "Failed to copy data to device"); // Can't pass bool to device, so convert it to int - int inverse_int = inv; + int inverse_int = (int)inv; // Create Kernels - names must match the kernel name in the original CL file fft_kernel = clCreateKernel(program, "fft2d", &status); @@ -522,7 +647,7 @@ fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, int inv){ * \param interleaving : 1 if interleaved global memory buffers * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv, int interleaving, int how_many){ +fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, bool inv, int interleaving, int how_many){ fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_kernel ffta_kernel = NULL, fftb_kernel = NULL; cl_kernel fetch_kernel = NULL, store_kernel = NULL; @@ -548,8 +673,8 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv, int interl flagbuf2 = CL_MEM_READ_WRITE; } else{ - flagbuf1 = CL_MEM_WRITE_ONLY | CL_CHANNEL_1_INTELFPGA; - flagbuf2 = CL_MEM_READ_ONLY | CL_CHANNEL_2_INTELFPGA; + flagbuf1 = CL_MEM_READ_ONLY | CL_CHANNEL_1_INTELFPGA; + flagbuf2 = CL_MEM_WRITE_ONLY | CL_CHANNEL_2_INTELFPGA; } // Device memory buffers @@ -572,7 +697,7 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv, int interl checkError(status, "Failed to copy data to device"); // Can't pass bool to device, so convert it to int - int inverse_int = inv; + int inverse_int = (int)inv; ffta_kernel = clCreateKernel(program, "fft2da", &status); checkError(status, "Failed to create fft2da kernel"); @@ -689,12 +814,13 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, int inv, int interl * \param interleaving : 1 if using burst interleaved global memory buffers * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, int inv, int interleaving) { +fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, bool inv, bool interleaving) { fpga_t fft_time = {0.0, 0.0, 0.0, 0}; + cl_int status = 0; + cl_kernel fft3da_kernel = NULL, fft3db_kernel = NULL, fft3dc_kernel = NULL; cl_kernel fetch_kernel = NULL, store_kernel = NULL; cl_kernel transpose_kernel = NULL, transpose3d_kernel = NULL; - cl_int status = 0; // if N is not a power of 2 if(inp == NULL || out == NULL || ( (N & (N-1)) !=0)){ @@ -708,13 +834,13 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, int inv, int interl queue_setup(); cl_mem_flags flagbuf1, flagbuf2; - if(interleaving == 1){ + if(interleaving){ flagbuf1 = CL_MEM_READ_WRITE; flagbuf2 = CL_MEM_READ_WRITE; } else{ - flagbuf1 = CL_MEM_WRITE_ONLY | CL_CHANNEL_1_INTELFPGA; - flagbuf2 = CL_MEM_READ_ONLY | CL_CHANNEL_2_INTELFPGA; + flagbuf1 = CL_MEM_READ_ONLY | CL_CHANNEL_1_INTELFPGA; + flagbuf2 = CL_MEM_WRITE_ONLY | CL_CHANNEL_2_INTELFPGA; } // Device memory buffers @@ -736,7 +862,7 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, int inv, int interl checkError(status, "Failed to copy data to device"); // Can't pass bool to device, so convert it to int - int inverse_int = inv; + int inverse_int = (int)inv; // Create the kernel - name passed in here must match kernel name in the // original CL file, that was compiled into an AOCX file using the AOC tool @@ -857,7 +983,7 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, int inv, int interl * \param interleaving : 1 if using burst interleaved global memory buffers * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, float2 *inp, float2 *out, int inv) { +fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, float2 *inp, float2 *out, bool inv) { fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_int status = 0; int num_pts = N * N * N; @@ -886,7 +1012,7 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, float2 *inp, float2 *out, int inv) { #endif // Can't pass bool to device, so convert it to int - int inverse_int = inv; + int inverse_int = (int)inv; // Setup kernels cl_kernel fetch1_kernel = clCreateKernel(program, "fetchBitrev1", &status); @@ -1061,7 +1187,7 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, float2 *inp, float2 *out, int inv) { * \param interleaving : 1 if using burst interleaved global memory buffers * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv) { +fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, bool inv) { fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_int status = 0; int num_pts = N * N * N; @@ -1076,7 +1202,7 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv) { #endif // Can't pass bool to device, so convert it to int - int inverse_int = inv; + int inverse_int = (int)inv; // Setup kernels cl_kernel fetch1_kernel = clCreateKernel(program, "fetchBitrev1", &status); @@ -1222,7 +1348,7 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, int inv) { return fft_time; } -fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, float2 *inp, float2 *out, int inv, int how_many) { +fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, float2 *inp, float2 *out, bool inv, int how_many) { fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_int status = 0; int num_pts = N * N * N; @@ -1241,7 +1367,7 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, float2 *inp, float2 *out, int inv, i #endif // Can't pass bool to device, so convert it to int - int inverse_int = inv; + int inverse_int = (int)inv; // Setup kernels cl_kernel fetch1_kernel = clCreateKernel(program, "fetchBitrev1", &status); diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index bc124c9..00b269f 100755 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -4,7 +4,7 @@ project(examplesfftfpga VERSION 0.1 DESCRIPTION "Example Code that uses libfftfpga" LANGUAGES C CXX) -set(examples fft3d fft3d_svm fft3d_svm_batch fft2d fft1d) +set(examples fft3d_ddr fft3d_bram fft3d_ddr_svm fft3d_ddr_svm_batch fft2d fft1d fft1d_svm) # create a target for each of the example foreach(example ${examples}) diff --git a/examples/common/helper.c b/examples/common/helper.c index 6dffed2..6891648 100755 --- a/examples/common/helper.c +++ b/examples/common/helper.c @@ -56,22 +56,24 @@ bool fft_create_data(double2 *inp, int N){ * \param N: fft size * \param dim: number of dimensions of size * \param iter: number of iterations of each transformation (if BATCH mode) - * \param inv: 1, backward transform - * \param sp: 1, single precision floating point transformation - * \param use_bram: 1 if transpose uses BRAM, not DDR (valid for 2d and 3d FFT) + * \param inv: true for backward transform + * \param sp: true for single precision floating point transformation + * \param use_bram: true if transpose uses BRAM, not DDR (valid for 2d and 3d FFT) + * \param interleaving: true if data should be interleaved amongst the banks in DDR memory */ -void print_config(int N, int dim, int iter, int inv, int sp, int batch, int use_bram){ +void print_config(int N, int dim, int iter, bool inv, bool sp, int batch, bool use_bram, bool interleaving){ printf("\n------------------------------------------\n"); printf("FFT Configuration: \n"); printf("--------------------------------------------\n"); printf("Type = Complex to Complex\n"); printf("Points = %d%s \n", N, dim == 1 ? "" : dim == 2 ? "^2" : "^3"); - printf("Precision = %s \n", sp==1 ? "Single": "Double"); + printf("Precision = %s \n", sp ? "Single": "Double"); printf("Direction = %s \n", inv ? "Backward":"Forward"); printf("Placement = In Place \n"); printf("Batch = %d \n", batch); printf("Iterations = %d \n", iter); printf("Transpose = %s \n", use_bram ? "BRAM":"DDR"); + printf("Interleaving = %s \n", interleaving ? "Yes":"No"); printf("--------------------------------------------\n\n"); } @@ -82,10 +84,10 @@ void print_config(int N, int dim, int iter, int inv, int sp, int batch, int use_ * \param N: fft size * \param dim: number of dimensions of size * \param iter: number of iterations of each transformation (if BATCH mode) - * \param inv: 1 if backward transform + * \param inv: true if backward transform * \param single precision floating point transformation */ -void display_measures(double total_api_time, double pcie_rd, double pcie_wr, double exec_t, int N, int dim, int iter, int batch, int inv, int sp){ +void display_measures(double total_api_time, double pcie_rd, double pcie_wr, double exec_t, int N, int dim, int iter, int batch, bool inv, bool sp){ double avg_api_time = 0.0; @@ -100,7 +102,7 @@ void display_measures(double total_api_time, double pcie_rd, double pcie_wr, dou double gpoints_per_sec = (pow(N, dim) / (exec * 1e-3)) * 1e-9; double gBytes_per_sec = 0.0; - if(sp == 1){ + if(sp){ gBytes_per_sec = gpoints_per_sec * 8; // bytes } else{ @@ -109,11 +111,11 @@ void display_measures(double total_api_time, double pcie_rd, double pcie_wr, dou double gflops = dim * 5 * pow(N, dim) * (log((double)N)/log((double)2))/(exec * 1e-3 * 1E9); - printf("\n------------------------------------------\n"); + printf("\n\n------------------------------------------\n"); printf("Measurements \n"); printf("--------------------------------------------\n"); printf("Points = %d%s \n", N, dim == 1 ? "" : dim == 2 ? "^2" : "^3"); - printf("Precision = %s\n", sp==1 ? "Single": "Double"); + printf("Precision = %s\n", sp ? "Single": "Double"); printf("Direction = %s\n", inv ? "Backward":"Forward"); printf("Iterations = %d\n", iter); printf("Batch = %d\n", batch); diff --git a/examples/common/helper.h b/examples/common/helper.h index e72c2bf..8ea0ba7 100755 --- a/examples/common/helper.h +++ b/examples/common/helper.h @@ -10,9 +10,9 @@ bool fftf_create_data(float2 *inp, int N); bool fft_create_data(double2 *inp, int N); -void print_config(int N, int dim, int iter, int inv, int sp, int batch, int use_bram); +void print_config(int N, int dim, int iter, bool inv, bool sp, int batch, bool use_bram, bool interleaving); -void display_measures(double total_api_time, double pcie_rd, double pcie_wr, double exec, int N, int dim, int iter, int batch, int inv, int sp); +void display_measures(double total_api_time, double pcie_rd, double pcie_wr, double exec, int N, int dim, int iter, int batch, bool inv, bool sp); double getTimeinMilliseconds(); #endif // HELPER_H diff --git a/examples/common/verify_fftw.c b/examples/common/verify_fftw.c index bd57afe..bee2de4 100644 --- a/examples/common/verify_fftw.c +++ b/examples/common/verify_fftw.c @@ -11,11 +11,11 @@ * \param fpga_out: pointer to fpga computation of fft3d for sp complex data * \param fftw_data: pointer to fft3d sized allocation of sp complex data for fftw cpu computation * \param N: number of points per dimension of FFT3d - * \param inverse: 1 if inverse + * \param inverse: true if backward FFT * \param how_many: batch, default is 1 * \return true if verification passed */ -bool verify_sp_fft3d_fftw(float2 *fpgaout, float2 *verify, int N, int inverse, int how_many){ +bool verify_sp_fft3d_fftw(float2 *fpgaout, float2 *verify, int N, bool inverse, int how_many){ // Copy inp data to verify using FFTW // requires allocating data specifically for FFTW computation @@ -91,10 +91,10 @@ bool verify_sp_fft3d_fftw(float2 *fpgaout, float2 *verify, int N, int inverse, i * \param fpga_out: pointer to fpga computation of fft2d for sp complex data * \param fftw_data: pointer to fft2d sized allocation of sp complex data for fftw cpu computation * \param N: number of points per dimension of FFT2d - * \param inverse: 1 if inverse + * \param inverse: true if backward FFT * \return true if verification passed */ -bool verify_sp_fft2d_fftw(float2 *fpgaout, float2 *verify, int N, int inverse){ +bool verify_sp_fft2d_fftw(float2 *fpgaout, float2 *verify, int N, bool inverse){ // Copy inp data to verify using FFTW // requires allocating data specifically for FFTW computation diff --git a/examples/common/verify_fftw.h b/examples/common/verify_fftw.h index 76896c9..098f1f1 100644 --- a/examples/common/verify_fftw.h +++ b/examples/common/verify_fftw.h @@ -3,8 +3,8 @@ #ifndef FFT3D_FFTW_H #define FFT3D_FFTW_H -int verify_sp_fft2d_fftw(float2 *fpgaout, float2 *verify, int N, int inverse); +int verify_sp_fft2d_fftw(float2 *fpgaout, float2 *verify, int N, bool inverse); -int verify_sp_fft3d_fftw(float2 *fpgaout, float2 *verify, int N, int inverse, int how_many); +int verify_sp_fft3d_fftw(float2 *fpgaout, float2 *verify, int N, bool inverse, int how_many); #endif // FFT3D_FFTW_H \ No newline at end of file diff --git a/examples/fft1d.c b/examples/fft1d.c index e4bcf32..f45b0a3 100644 --- a/examples/fft1d.c +++ b/examples/fft1d.c @@ -17,14 +17,17 @@ static const char *const usage[] = { }; int main(int argc, const char **argv) { - int N = 64, dim = 1, iter = 1, inv = 0, sp = 0, batch = 1, use_bram; + int N = 64, dim = 1, iter = 1, batch = 1; + + bool use_bram = false, sp = true, inv = false, use_svm = false, interleaving = false; + bool status = true, use_emulator = false; + char *path = "fft1d_emulate.aocx"; const char *platform = "Intel(R) FPGA"; + fpga_t timing = {0.0, 0.0, 0.0, 0}; - int use_svm = 0; double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; double temp_timer = 0.0, total_api_time = 0.0; - bool status = true, use_emulator = false; struct argparse_option options[] = { OPT_HELP(), @@ -33,9 +36,7 @@ int main(int argc, const char **argv) { OPT_BOOLEAN('s',"sp", &sp, "Single Precision"), OPT_INTEGER('i',"iter", &iter, "Iterations"), OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), - OPT_BOOLEAN('v',"svm", &use_svm, "Use SVM"), OPT_INTEGER('c',"batch", &batch, "Batch"), - OPT_BOOLEAN('m',"bram", &use_bram, "Use BRAM"), OPT_STRING('p', "path", &path, "Path to bitstream"), OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), OPT_END(), @@ -47,7 +48,7 @@ int main(int argc, const char **argv) { argc = argparse_parse(&argparse, argc, argv); // Print to console the configuration chosen to execute during runtime - print_config(N, dim, iter, inv, sp, batch, use_bram); + print_config(N, dim, iter, inv, sp, batch, use_bram, interleaving); if(use_emulator){ platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; @@ -61,56 +62,48 @@ int main(int argc, const char **argv) { return EXIT_FAILURE; } - // Select based on dimensions and precisions different functions - if(sp == 0){ - printf("Not implemented. Work in Progress\n"); - return EXIT_SUCCESS; - } - else{ + size_t inp_sz = sizeof(float2) * N * batch; + + float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); + float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); - // find the average of iterations of batched 1D FFTs - // random data every iteration and every batch - for(size_t i = 0; i < iter; i++){ - - size_t inp_sz = sizeof(float2) * N * batch; - - float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); - float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); - - status = fftf_create_data(inp, N * batch); - if(!status){ - fprintf(stderr, "Error in Data Creation \n"); - free(inp); - free(out); - return EXIT_FAILURE; - } - - temp_timer = getTimeinMilliseconds(); - timing = fftfpgaf_c2c_1d(N, inp, out, inv, batch); - total_api_time += getTimeinMilliseconds() - temp_timer; - - // TODO: Verification of bit reversed output - if(timing.valid == 0){ - fprintf(stderr, "Invalid execution, timing found to be 0"); - free(inp); - free(out); - return EXIT_FAILURE; - } - avg_rd += timing.pcie_read_t; - avg_wr += timing.pcie_write_t; - avg_exec += timing.exec_t; - - printf("Iter: %lu\n", i); - printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); - printf("\tKernel: %lfms\n", timing.exec_t); - printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); - - // destroy FFT input and output + // find the average of iterations of batched 1D FFTs + // random data every iteration and every batch + for(size_t i = 0; i < iter; i++){ + + status = fftf_create_data(inp, N * batch); + if(!status){ + fprintf(stderr, "Error in Data Creation \n"); free(inp); free(out); + return EXIT_FAILURE; } + + temp_timer = getTimeinMilliseconds(); + timing = fftfpgaf_c2c_1d(N, inp, out, inv, batch); + total_api_time += getTimeinMilliseconds() - temp_timer; + + // TODO: Verification of bit reversed output + if(timing.valid == 0){ + fprintf(stderr, "Invalid execution, timing found to be 0"); + free(inp); + free(out); + return EXIT_FAILURE; + } + avg_rd += timing.pcie_read_t; + avg_wr += timing.pcie_write_t; + avg_exec += timing.exec_t; + + printf("Iter: %lu\n", i); + printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); + printf("\tKernel: %lfms\n", timing.exec_t); + printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); } + // destroy FFT input and output + free(inp); + free(out); + // destroy data fpga_final(); diff --git a/examples/fft1d_svm.c b/examples/fft1d_svm.c new file mode 100644 index 0000000..ba020cf --- /dev/null +++ b/examples/fft1d_svm.c @@ -0,0 +1,113 @@ +// Author: Arjun Ramaswami + +#include +#include // EXIT_FAILURE +#include +#include + +#include "CL/opencl.h" +#include "fftfpga/fftfpga.h" + +#include "argparse.h" +#include "helper.h" + +static const char *const usage[] = { + "bin/host [options]", + NULL, +}; + +int main(int argc, const char **argv) { + int N = 64, dim = 1, iter = 1, batch = 1; + + bool use_bram = false, sp = true, inv = false, use_svm = true, interleaving = false; + bool status = true, use_emulator = false; + + char *path = "fft1d_emulate.aocx"; + const char *platform = "Intel(R) FPGA"; + + fpga_t timing = {0.0, 0.0, 0.0, 0}; + double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; + double temp_timer = 0.0, total_api_time = 0.0; + + struct argparse_option options[] = { + OPT_HELP(), + OPT_GROUP("Basic Options"), + OPT_INTEGER('n',"n", &N, "FFT Points"), + OPT_BOOLEAN('s',"sp", &sp, "Single Precision"), + OPT_INTEGER('i',"iter", &iter, "Iterations"), + OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), + OPT_INTEGER('c',"batch", &batch, "Batch"), + OPT_STRING('p', "path", &path, "Path to bitstream"), + OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), + OPT_END(), + }; + + struct argparse argparse; + argparse_init(&argparse, options, usage, 0); + argparse_describe(&argparse, "Computing FFT using FPGA", "FFT size and dimensions are mandatory, default dimension and number of iterations are 1"); + argc = argparse_parse(&argparse, argc, argv); + + // Print to console the configuration chosen to execute during runtime + print_config(N, dim, iter, inv, sp, batch, use_bram, interleaving); + + if(use_emulator){ + platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; + } + else{ + platform = "Intel(R) FPGA SDK for OpenCL(TM)"; + } + + int isInit = fpga_initialize(platform, path, use_svm); + if(isInit != 0){ + return EXIT_FAILURE; + } + + size_t inp_sz = sizeof(float2) * N * batch; + + float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); + float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); + + // find the average of iterations of batched 1D FFTs + // random data every iteration and every batch + for(size_t i = 0; i < iter; i++){ + + status = fftf_create_data(inp, N * batch); + if(!status){ + fprintf(stderr, "Error in Data Creation \n"); + free(inp); + free(out); + return EXIT_FAILURE; + } + + temp_timer = getTimeinMilliseconds(); + timing = fftfpgaf_c2c_1d_svm(N, inp, out, inv, batch); + total_api_time += getTimeinMilliseconds() - temp_timer; + + // TODO: Verification of bit reversed output + if(timing.valid == 0){ + fprintf(stderr, "Invalid execution, timing found to be 0"); + free(inp); + free(out); + return EXIT_FAILURE; + } + avg_rd += timing.pcie_read_t; + avg_wr += timing.pcie_write_t; + avg_exec += timing.exec_t; + + printf("Iter: %lu\n", i); + printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); + printf("\tKernel: %lfms\n", timing.exec_t); + printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); + + } + // destroy FFT input and output + free(inp); + free(out); + + // destroy data + fpga_final(); + + display_measures(total_api_time, avg_rd, avg_wr, avg_exec, N, dim, iter, batch, inv, sp); + + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/examples/fft2d.c b/examples/fft2d.c index 98f074f..cf4b0f4 100644 --- a/examples/fft2d.c +++ b/examples/fft2d.c @@ -18,14 +18,18 @@ static const char *const usage[] = { }; int main(int argc, const char **argv) { - int N = 64, dim = 2, iter = 1, inv = 0, sp = 0, use_bram = 0, batch = 1, interleaving = 0; + int N = 64, dim = 2, iter = 1, batch = 1; + + bool use_bram = false, interleaving = false, sp = true, inv = false; + bool status = true, use_emulator = false; + bool use_svm = 0; + char *path = "fft2d_emulate.aocx"; const char *platform = "Intel(R) FPGA"; + fpga_t timing = {0.0, 0.0, 0.0, 0}; - int use_svm = 0; double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; double temp_timer = 0.0, total_api_time = 0.0; - bool status = true, use_emulator = false; struct argparse_option options[] = { OPT_HELP(), @@ -49,7 +53,7 @@ int main(int argc, const char **argv) { argc = argparse_parse(&argparse, argc, argv); // Print to console the configuration chosen to execute during runtime - print_config(N, dim, iter, inv, sp, batch, use_bram); + print_config(N, dim, iter, inv, sp, batch, use_bram, interleaving); if(use_emulator){ platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; @@ -63,66 +67,61 @@ int main(int argc, const char **argv) { return EXIT_FAILURE; } - if(sp == 0){ - printf("Not implemented. Work in Progress\n"); - return EXIT_SUCCESS; - } - else{ + size_t inp_sz = sizeof(float2) * N * N; + float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); + float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); - for(size_t i = 0; i < iter; i++){ - size_t inp_sz = sizeof(float2) * N * N; - float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); - float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); - - status = fftf_create_data(inp, N * N); - if(!status){ - free(inp); - free(out); - return EXIT_FAILURE; - } - - if(use_bram == 1){ - // use bram for 2d Transpose - temp_timer = getTimeinMilliseconds(); - timing = fftfpgaf_c2c_2d_bram(N, inp, out, inv, interleaving, batch); - total_api_time += getTimeinMilliseconds() - temp_timer; - } - else{ - // use global memory for 2d Transpose - temp_timer = getTimeinMilliseconds(); - timing = fftfpgaf_c2c_2d_ddr(N, inp, out, inv); - total_api_time += getTimeinMilliseconds() - temp_timer; - } - - #ifdef USE_FFTW - if(!verify_sp_fft2d_fftw(out, inp, N, inv)){ - fprintf(stderr, "2d FFT Verification Failed \n"); - free(inp); - free(out); - return EXIT_FAILURE; - } - #endif - if(timing.valid == 0){ - fprintf(stderr, "Invalid execution, timing found to be 0"); - free(inp); - free(out); - return EXIT_FAILURE; - } - - avg_rd += timing.pcie_read_t; - avg_wr += timing.pcie_write_t; - avg_exec += timing.exec_t; - - printf("Iter: %lu\n", i); - printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); - printf("\tKernel: %lfms\n", timing.exec_t); - printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); - - // destroy FFT input and output + for(size_t i = 0; i < iter; i++){ + + status = fftf_create_data(inp, N * N); + if(!status){ free(inp); free(out); - } // iter - } // sp condition + return EXIT_FAILURE; + } + + if(use_bram == 1){ + // use bram for 2d Transpose + temp_timer = getTimeinMilliseconds(); + timing = fftfpgaf_c2c_2d_bram(N, inp, out, inv, interleaving, batch); + total_api_time += getTimeinMilliseconds() - temp_timer; + } + else{ + // use global memory for 2d Transpose + temp_timer = getTimeinMilliseconds(); + timing = fftfpgaf_c2c_2d_ddr(N, inp, out, inv); + total_api_time += getTimeinMilliseconds() - temp_timer; + } + +#ifdef USE_FFTW + if(!verify_sp_fft2d_fftw(out, inp, N, inv)){ + fprintf(stderr, "2d FFT Verification Failed \n"); + free(inp); + free(out); + return EXIT_FAILURE; + } +#endif + if(timing.valid == 0){ + fprintf(stderr, "Invalid execution, timing found to be 0"); + free(inp); + free(out); + return EXIT_FAILURE; + } + + avg_rd += timing.pcie_read_t; + avg_wr += timing.pcie_write_t; + avg_exec += timing.exec_t; + + printf("Iter: %lu\n", i); + printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); + printf("\tKernel: %lfms\n", timing.exec_t); + printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); + + } // iter + + // destroy FFT input and output + free(inp); + free(out); // destroy fpga state fpga_final(); diff --git a/examples/fft3d.c b/examples/fft3d_bram.c similarity index 51% rename from examples/fft3d.c rename to examples/fft3d_bram.c index 4ca5c77..378d29f 100755 --- a/examples/fft3d.c +++ b/examples/fft3d_bram.c @@ -18,14 +18,18 @@ static const char *const usage[] = { }; int main(int argc, const char **argv) { - int N = 64, dim = 3, iter = 1, inv = 0, sp = 0, use_bram = 0, batch = 1,interleaving = 0; + int N = 64, dim = 3, iter = 1, batch = 1; + + bool interleaving = false, use_bram = true, sp = true; + bool use_svm = false, inv = false; + bool status = true, use_emulator = false; + char *path = "fft3d_emulate.aocx"; const char *platform; + fpga_t timing = {0.0, 0.0, 0.0, 0}; - int use_svm = 0; double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; double temp_timer = 0.0, total_api_time = 0.0; - bool status = true, use_emulator = false; struct argparse_option options[] = { OPT_HELP(), @@ -34,7 +38,6 @@ int main(int argc, const char **argv) { OPT_BOOLEAN('s',"sp", &sp, "Single Precision"), OPT_INTEGER('i',"iter", &iter, "Iterations"), OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), - OPT_BOOLEAN('m',"bram", &use_bram, "Use BRAM"), OPT_BOOLEAN('t',"interleaving", &interleaving, "Use burst interleaving in case of BRAM designs"), OPT_STRING('p', "path", &path, "Path to bitstream"), OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), @@ -47,7 +50,7 @@ int main(int argc, const char **argv) { argc = argparse_parse(&argparse, argc, argv); // Print to console the configuration chosen to execute during runtime - print_config(N, dim, iter, inv, sp, batch, use_bram); + print_config(N, dim, iter, inv, sp, batch, use_bram, interleaving); if(use_emulator){ platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; @@ -64,68 +67,54 @@ int main(int argc, const char **argv) { return EXIT_FAILURE; } - if(sp == 0){ - printf("Not implemented. Work in Progress\n"); - return EXIT_SUCCESS; - } - else{ - for(size_t i = 0; i < iter; i++){ - - // create and destroy data every iteration - size_t inp_sz = sizeof(float2) * N * N * N; - float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); - float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); - - status = fftf_create_data(inp, N * N * N); - if(!status){ - fprintf(stderr, "Error in Data Creation \n"); - free(inp); - free(out); - return EXIT_FAILURE; - } - - if(use_bram == 1){ - // use bram for 3d Transpose - temp_timer = getTimeinMilliseconds(); - timing = fftfpgaf_c2c_3d_bram(N, inp, out, inv, interleaving); - total_api_time += getTimeinMilliseconds() - temp_timer; - } - else{ - // use ddr for 3d Transpose - temp_timer = getTimeinMilliseconds(); - timing = fftfpgaf_c2c_3d_ddr(N, inp, out, inv); - total_api_time += getTimeinMilliseconds() - temp_timer; - } + size_t inp_sz = sizeof(float2) * N * N * N; + float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); + float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); + + for(size_t i = 0; i < iter; i++){ + + // create and destroy data every iteration + status = fftf_create_data(inp, N * N * N); + if(!status){ + fprintf(stderr, "Error in Data Creation \n"); + free(inp); + free(out); + return EXIT_FAILURE; + } + + // use bram for 3d Transpose + temp_timer = getTimeinMilliseconds(); + timing = fftfpgaf_c2c_3d_bram(N, inp, out, inv, interleaving); + total_api_time += getTimeinMilliseconds() - temp_timer; #ifdef USE_FFTW - if(!verify_sp_fft3d_fftw(out, inp, N, inv, 1)){ - fprintf(stderr, "3d FFT Verification Failed \n"); - free(inp); - free(out); - return EXIT_FAILURE; - } + if(!verify_sp_fft3d_fftw(out, inp, N, inv, 1)){ + fprintf(stderr, "3d FFT Verification Failed \n"); + free(inp); + free(out); + return EXIT_FAILURE; + } #endif - if(timing.valid == 0){ - fprintf(stderr, "Invalid execution, timing found to be 0"); - free(inp); - free(out); - return EXIT_FAILURE; - } - - avg_rd += timing.pcie_read_t; - avg_wr += timing.pcie_write_t; - avg_exec += timing.exec_t; - - printf("Iter: %lu\n", i); - printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); - printf("\tKernel: %lfms\n", timing.exec_t); - printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); - - // destroy FFT input and output + if(timing.valid == 0){ + fprintf(stderr, "Invalid execution, timing found to be 0"); free(inp); free(out); - } // iter - } // sp condition + return EXIT_FAILURE; + } + + avg_rd += timing.pcie_read_t; + avg_wr += timing.pcie_write_t; + avg_exec += timing.exec_t; + + printf("Iter: %lu\n", i); + printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); + printf("\tKernel: %lfms\n", timing.exec_t); + printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); + + } // iter + // destroy FFT input and output + free(inp); + free(out); // destroy fpga state fpga_final(); diff --git a/examples/fft3d_ddr.c b/examples/fft3d_ddr.c new file mode 100755 index 0000000..a69fa8e --- /dev/null +++ b/examples/fft3d_ddr.c @@ -0,0 +1,126 @@ +// Author: Arjun Ramaswami + +#include +#include // EXIT_FAILURE +#include +#include + +#include "CL/opencl.h" +#include "fftfpga/fftfpga.h" + +#include "argparse.h" +#include "helper.h" +#include "verify_fftw.h" + +static const char *const usage[] = { + "bin/host [options]", + NULL, +}; + +int main(int argc, const char **argv) { + int N = 64, dim = 3, iter = 1, batch = 1; + + bool inv = false, sp = true; + bool use_bram = false, interleaving = false, use_svm = false; + bool status = true, use_emulator = false; + + char *path = "fft3d_emulate.aocx"; + const char *platform; + + fpga_t timing = {0.0, 0.0, 0.0, 0}; + double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; + double temp_timer = 0.0, total_api_time = 0.0; + + struct argparse_option options[] = { + OPT_HELP(), + OPT_GROUP("Basic Options"), + OPT_INTEGER('n',"n", &N, "FFT Points"), + OPT_BOOLEAN('s',"sp", &sp, "Single Precision"), + OPT_INTEGER('i',"iter", &iter, "Iterations"), + OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), + OPT_BOOLEAN('t',"interleaving", &interleaving, "Use burst interleaving in case of BRAM designs"), + OPT_STRING('p', "path", &path, "Path to bitstream"), + OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), + OPT_END(), + }; + + struct argparse argparse; + argparse_init(&argparse, options, usage, 0); + argparse_describe(&argparse, "Computing FFT using FPGA", "FFT size and dimensions are mandatory, default dimension and number of iterations are 1"); + argc = argparse_parse(&argparse, argc, argv); + + // Print to console the configuration chosen to execute during runtime + print_config(N, dim, iter, inv, sp, batch, use_bram, interleaving); + + if(use_emulator){ + platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; + //platform = "Intel(R) FPGA"; + } + else{ + platform = "Intel(R) FPGA SDK for OpenCL(TM)"; + //platform = "Intel(R) FPGA"; + } + + int isInit = fpga_initialize(platform, path, use_svm); + if(isInit != 0){ + fprintf(stderr, "FPGA initialization error\n"); + return EXIT_FAILURE; + } + + size_t inp_sz = sizeof(float2) * N * N * N; + float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); + float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); + + for(size_t i = 0; i < iter; i++){ + + // create and destroy data every iteration + status = fftf_create_data(inp, N * N * N); + if(!status){ + fprintf(stderr, "Error in Data Creation \n"); + free(inp); + free(out); + return EXIT_FAILURE; + } + + // use ddr for 3d Transpose + temp_timer = getTimeinMilliseconds(); + timing = fftfpgaf_c2c_3d_ddr(N, inp, out, inv); + total_api_time += getTimeinMilliseconds() - temp_timer; + +#ifdef USE_FFTW + if(!verify_sp_fft3d_fftw(out, inp, N, inv, 1)){ + fprintf(stderr, "3d FFT Verification Failed \n"); + free(inp); + free(out); + return EXIT_FAILURE; + } +#endif + if(timing.valid == 0){ + fprintf(stderr, "Invalid execution, timing found to be 0"); + free(inp); + free(out); + return EXIT_FAILURE; + } + + avg_rd += timing.pcie_read_t; + avg_wr += timing.pcie_write_t; + avg_exec += timing.exec_t; + + printf("Iter: %lu\n", i); + printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); + printf("\tKernel: %lfms\n", timing.exec_t); + printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); + + } // iter + // destroy FFT input and output + free(inp); + free(out); + + // destroy fpga state + fpga_final(); + + // display performance measures + display_measures(total_api_time, avg_rd, avg_wr, avg_exec, N, dim, iter, batch, inv, sp); + + return EXIT_SUCCESS; +} diff --git a/examples/fft3d_ddr_svm.c b/examples/fft3d_ddr_svm.c new file mode 100755 index 0000000..59cc32d --- /dev/null +++ b/examples/fft3d_ddr_svm.c @@ -0,0 +1,125 @@ +// Author: Arjun Ramaswami + +#include +#include // EXIT_FAILURE +#include +#include + +#include "CL/opencl.h" +#include "fftfpga/fftfpga.h" + +#include "argparse.h" +#include "helper.h" +#include "verify_fftw.h" + +static const char *const usage[] = { + "bin/host [options]", + NULL, +}; + +int main(int argc, const char **argv) { + int N = 64, dim = 3, iter = 1, batch = 1; + + bool interleaving = false, use_bram = false, sp = true, inv = false; + bool status = true, use_emulator = false; + bool use_svm = true; + + char *path = "fft3d_emulate.aocx"; + const char *platform; + + fpga_t timing = {0.0, 0.0, 0.0, 0}; + double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; + double temp_timer = 0.0, total_api_time = 0.0; + + struct argparse_option options[] = { + OPT_HELP(), + OPT_GROUP("Basic Options"), + OPT_INTEGER('n',"n", &N, "FFT Points"), + OPT_BOOLEAN('s',"sp", &sp, "Single Precision"), + OPT_INTEGER('i',"iter", &iter, "Iterations"), + OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), + OPT_BOOLEAN('t',"interleaving", &interleaving, "Use burst interleaving in case of BRAM designs"), + OPT_STRING('p', "path", &path, "Path to bitstream"), + OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), + OPT_END(), + }; + + struct argparse argparse; + argparse_init(&argparse, options, usage, 0); + argparse_describe(&argparse, "Computing FFT using FPGA", "FFT size and dimensions are mandatory, default dimension and number of iterations are 1"); + argc = argparse_parse(&argparse, argc, argv); + + // Print to console the configuration chosen to execute during runtime + print_config(N, dim, iter, inv, sp, batch, use_bram, interleaving); + + if(use_emulator){ + platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; + //platform = "Intel(R) FPGA"; + } + else{ + platform = "Intel(R) FPGA SDK for OpenCL(TM)"; + //platform = "Intel(R) FPGA"; + } + + int isInit = fpga_initialize(platform, path, use_svm); + if(isInit != 0){ + fprintf(stderr, "FPGA initialization error\n"); + return EXIT_FAILURE; + } + + // create and destroy data every iteration + size_t inp_sz = sizeof(float2) * N * N * N; + float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); + float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); + + for(size_t i = 0; i < iter; i++){ + status = fftf_create_data(inp, N * N * N); + if(!status){ + fprintf(stderr, "Error in Data Creation \n"); + free(inp); + free(out); + return EXIT_FAILURE; + } + + // use ddr for 3d Transpose + temp_timer = getTimeinMilliseconds(); + timing = fftfpgaf_c2c_3d_ddr_svm(N, inp, out, inv); + total_api_time += getTimeinMilliseconds() - temp_timer; + +#ifdef USE_FFTW + if(!verify_sp_fft3d_fftw(out, inp, N, inv, 1)){ + fprintf(stderr, "3d FFT Verification Failed \n"); + free(inp); + free(out); + return EXIT_FAILURE; + } +#endif + if(timing.valid == 0){ + fprintf(stderr, "Invalid execution, timing found to be 0"); + free(inp); + free(out); + return EXIT_FAILURE; + } + + avg_rd += timing.pcie_read_t; + avg_wr += timing.pcie_write_t; + avg_exec += timing.exec_t; + + printf("Iter: %lu\n", i); + printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); + printf("\tKernel: %lfms\n", timing.exec_t); + printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); + } // iter + + // destroy FFT input and output + free(inp); + free(out); + + // destroy fpga state + fpga_final(); + + // display performance measures + display_measures(total_api_time, avg_rd, avg_wr, avg_exec, N, dim, iter, batch, inv, sp); + + return EXIT_SUCCESS; +} diff --git a/examples/fft3d_svm_batch.c b/examples/fft3d_ddr_svm_batch.c similarity index 57% rename from examples/fft3d_svm_batch.c rename to examples/fft3d_ddr_svm_batch.c index c9efaac..b7d43cf 100755 --- a/examples/fft3d_svm_batch.c +++ b/examples/fft3d_ddr_svm_batch.c @@ -18,14 +18,18 @@ static const char *const usage[] = { }; int main(int argc, const char **argv) { - int N = 64, dim = 3, iter = 1, inv = 0, sp = 0, use_bram = 0, interleaving = 0, batch = 1; + int N = 64, dim = 3, iter = 1, batch = 1; + + bool inv = false, sp = true, use_bram = false, interleaving = false; + bool status = true, use_emulator = false; + bool use_svm = true; + char *path = "fft3d_emulate.aocx"; const char *platform; fpga_t timing = {0.0, 0.0, 0.0, 0}; - int use_svm = 0; + double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; double temp_timer = 0.0, total_api_time = 0.0; - bool status = true, use_emulator = false; struct argparse_option options[] = { OPT_HELP(), @@ -34,9 +38,7 @@ int main(int argc, const char **argv) { OPT_BOOLEAN('s',"sp", &sp, "Single Precision"), OPT_INTEGER('i',"iter", &iter, "Iterations"), OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), - OPT_BOOLEAN('v',"svm", &use_svm, "Use SVM"), OPT_INTEGER('c',"batch", &batch, "Batch"), - OPT_BOOLEAN('m',"bram", &use_bram, "Use BRAM"), OPT_BOOLEAN('t',"interleaving", &interleaving, "Use burst interleaving in case of BRAM designs"), OPT_STRING('p', "path", &path, "Path to bitstream"), OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), @@ -49,7 +51,7 @@ int main(int argc, const char **argv) { argc = argparse_parse(&argparse, argc, argv); // Print to console the configuration chosen to execute during runtime - print_config(N, dim, iter, inv, sp, batch, use_bram); + print_config(N, dim, iter, inv, sp, batch, use_bram, interleaving); if(use_emulator){ platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; @@ -66,56 +68,50 @@ int main(int argc, const char **argv) { return EXIT_FAILURE; } - if(sp == 0){ - printf("Not implemented. Work in Progress\n"); - return EXIT_SUCCESS; - } - else{ - for(size_t i = 0; i < iter; i++){ - - // create and destroy data every iteration - size_t inp_sz = sizeof(float2) * N * N * N * batch; - float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); - float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); - - status = fftf_create_data(inp, N * N * N * batch); - if(!status){ - fprintf(stderr, "Error in Data Creation \n"); - free(inp); - free(out); - return EXIT_FAILURE; - } - - // use ddr for 3d Transpose - temp_timer = getTimeinMilliseconds(); - timing = fftfpgaf_c2c_3d_ddr_svm_batch(N, inp, out, inv, batch); - total_api_time += getTimeinMilliseconds() - temp_timer; + // create and destroy data every iteration + size_t inp_sz = sizeof(float2) * N * N * N * batch; + float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); + float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); + + for(size_t i = 0; i < iter; i++){ + + status = fftf_create_data(inp, N * N * N * batch); + if(!status){ + fprintf(stderr, "Error in Data Creation \n"); + free(inp); + free(out); + return EXIT_FAILURE; + } + + // use ddr for 3d Transpose + temp_timer = getTimeinMilliseconds(); + timing = fftfpgaf_c2c_3d_ddr_svm_batch(N, inp, out, inv, batch); + total_api_time += getTimeinMilliseconds() - temp_timer; #ifdef USE_FFTW - if(!verify_sp_fft3d_fftw(out, inp, N, inv, batch)){ - fprintf(stderr, "3d FFT Verification Failed \n"); - free(inp); - free(out); - return EXIT_FAILURE; - } + if(!verify_sp_fft3d_fftw(out, inp, N, inv, batch)){ + fprintf(stderr, "3d FFT Verification Failed \n"); + free(inp); + free(out); + return EXIT_FAILURE; + } #endif - if(timing.valid == 0){ - fprintf(stderr, "Invalid execution, timing found to be 0"); - free(inp); - free(out); - return EXIT_FAILURE; - } - - avg_rd += timing.pcie_read_t; - avg_wr += timing.pcie_write_t; - avg_exec += timing.exec_t; - - // destroy FFT input and output + if(timing.valid == 0){ + fprintf(stderr, "Invalid execution, timing found to be 0"); free(inp); free(out); + return EXIT_FAILURE; + } + + avg_rd += timing.pcie_read_t; + avg_wr += timing.pcie_write_t; + avg_exec += timing.exec_t; + + } // iter - } // iter - } // sp condition + // destroy FFT input and output + free(inp); + free(out); // destroy fpga state fpga_final(); diff --git a/examples/fft3d_svm.c b/examples/fft3d_svm.c old mode 100755 new mode 100644 index ad96385..b04ad30 --- a/examples/fft3d_svm.c +++ b/examples/fft3d_svm.c @@ -18,14 +18,18 @@ static const char *const usage[] = { }; int main(int argc, const char **argv) { - int N = 64, dim = 3, iter = 1, inv = 0, sp = 0, use_bram = 0, batch = 1,interleaving = 0; + int N = 64, dim = 3, iter = 1, batch = 1; + + bool interleaving = false, use_bram = false, sp = true, inv = false; + bool status = true, use_emulator = false; + bool use_svm = true; + char *path = "fft3d_emulate.aocx"; const char *platform; + fpga_t timing = {0.0, 0.0, 0.0, 0}; - int use_svm = 1; double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; double temp_timer = 0.0, total_api_time = 0.0; - bool status = true, use_emulator = false; struct argparse_option options[] = { OPT_HELP(), @@ -34,7 +38,6 @@ int main(int argc, const char **argv) { OPT_BOOLEAN('s',"sp", &sp, "Single Precision"), OPT_INTEGER('i',"iter", &iter, "Iterations"), OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), - OPT_BOOLEAN('m',"bram", &use_bram, "Use BRAM"), OPT_BOOLEAN('t',"interleaving", &interleaving, "Use burst interleaving in case of BRAM designs"), OPT_STRING('p', "path", &path, "Path to bitstream"), OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), @@ -47,7 +50,7 @@ int main(int argc, const char **argv) { argc = argparse_parse(&argparse, argc, argv); // Print to console the configuration chosen to execute during runtime - print_config(N, dim, iter, inv, sp, batch, use_bram); + print_config(N, dim, iter, inv, sp, batch, use_bram, interleaving); if(use_emulator){ platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; @@ -69,13 +72,12 @@ int main(int argc, const char **argv) { return EXIT_SUCCESS; } else{ - for(size_t i = 0; i < iter; i++){ - - // create and destroy data every iteration - size_t inp_sz = sizeof(float2) * N * N * N; - float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); - float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); + // create and destroy data every iteration + size_t inp_sz = sizeof(float2) * N * N * N; + float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); + float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); + for(size_t i = 0; i < iter; i++){ status = fftf_create_data(inp, N * N * N); if(!status){ fprintf(stderr, "Error in Data Creation \n"); @@ -84,18 +86,9 @@ int main(int argc, const char **argv) { return EXIT_FAILURE; } - if(use_bram == 1){ - // use bram for 3d Transpose - temp_timer = getTimeinMilliseconds(); - timing = fftfpgaf_c2c_3d_bram(N, inp, out, inv, interleaving); - total_api_time += getTimeinMilliseconds() - temp_timer; - } - else{ - // use ddr for 3d Transpose - temp_timer = getTimeinMilliseconds(); - timing = fftfpgaf_c2c_3d_ddr_svm(N, inp, out, inv); - total_api_time += getTimeinMilliseconds() - temp_timer; - } + temp_timer = getTimeinMilliseconds(); + timing = fftfpgaf_c2c_3d_ddr_svm(N, inp, out, inv); + total_api_time += getTimeinMilliseconds() - temp_timer; #ifdef USE_FFTW if(!verify_sp_fft3d_fftw(out, inp, N, inv, 1)){ @@ -121,10 +114,11 @@ int main(int argc, const char **argv) { printf("\tKernel: %lfms\n", timing.exec_t); printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); - // destroy FFT input and output - free(inp); - free(out); } // iter + + // destroy FFT input and output + free(inp); + free(out); } // sp condition // destroy fpga state diff --git a/kernels/fft3d/CMakeLists.txt b/kernels/fft3d/CMakeLists.txt index 326807e..69ba673 100644 --- a/kernels/fft3d/CMakeLists.txt +++ b/kernels/fft3d/CMakeLists.txt @@ -9,7 +9,7 @@ cmake_minimum_required(VERSION 3.10) # - ${kernel_name}_syn: to generate synthesis binary ## set(CL_PATH "${fftkernelsfpga_SOURCE_DIR}/fft3d") -set(kernels fft3d_bram fft3d_ddr fft3d_ddr_triv) +set(kernels fft3d_bram fft3d_bram_opt fft3d_ddr fft3d_ddr_triv) include(${fftkernelsfpga_SOURCE_DIR}/cmake/genKernelTargets.cmake) diff --git a/kernels/fft3d/fft3d_bram.cl b/kernels/fft3d/fft3d_bram.cl index d103403..e23ee64 100755 --- a/kernels/fft3d/fft3d_bram.cl +++ b/kernels/fft3d/fft3d_bram.cl @@ -248,7 +248,7 @@ kernel void fft3db(int inverse) { } // Stores data for 3rd dim FFT -kernel void transpose3d(){ +kernel void transpose3D(){ unsigned revcolt, where; unsigned where_test; diff --git a/kernels/fft3d/fft3d_bram_opt.cl b/kernels/fft3d/fft3d_bram_opt.cl new file mode 100755 index 0000000..3decfa0 --- /dev/null +++ b/kernels/fft3d/fft3d_bram_opt.cl @@ -0,0 +1,495 @@ +// Author: Arjun Ramaswami + +#include "fft_config.h" +#include "fft_8.cl" +#include "../matrixTranspose/diagonal_bitrev.cl" + +#pragma OPENCL EXTENSION cl_intel_channels : enable + +channel float2 chaninfft3da[POINTS] __attribute__((depth(POINTS))); +channel float2 chaninfft3db[POINTS] __attribute__((depth(POINTS))); +channel float2 chaninfft3dc[POINTS] __attribute__((depth(POINTS))); + +channel float2 chaninTranspose[POINTS] __attribute__((depth(POINTS))); +channel float2 chaninTranspose3D[POINTS] __attribute__((depth(POINTS))); + +channel float2 chaninTranStore[POINTS] __attribute__((depth(POINTS))); + +// Kernel that fetches data from global memory +kernel void fetch(global volatile float2 * restrict src) { + unsigned delay = (1 << (LOGN - LOGPOINTS)); // N / 8 + bool is_bitrevA = false; + + float2 __attribute__((memory, numbanks(8))) buf[2][N]; + + // additional iterations to fill the buffers + for(unsigned step = 0; step < (N * DEPTH) + delay; step++){ + + unsigned where = (step & ((N * DEPTH) - 1)) * 8; + + float2x8 data; + if (step < (N * DEPTH)) { + data.i0 = src[where + 0]; + data.i1 = src[where + 1]; + data.i2 = src[where + 2]; + data.i3 = src[where + 3]; + data.i4 = src[where + 4]; + data.i5 = src[where + 5]; + data.i6 = src[where + 6]; + data.i7 = src[where + 7]; + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + unsigned row = step & (DEPTH - 1); + data = bitreverse_fetch(data, + is_bitrevA ? buf[0] : buf[1], + is_bitrevA ? buf[1] : buf[0], + row); + + if (step >= delay) { + write_channel_intel(chaninfft3da[0], data.i0); + write_channel_intel(chaninfft3da[1], data.i1); + write_channel_intel(chaninfft3da[2], data.i2); + write_channel_intel(chaninfft3da[3], data.i3); + write_channel_intel(chaninfft3da[4], data.i4); + write_channel_intel(chaninfft3da[5], data.i5); + write_channel_intel(chaninfft3da[6], data.i6); + write_channel_intel(chaninfft3da[7], data.i7); + } + } +} + +/* This single work-item task wraps the FFT engine + * 'inverse' toggles between the direct and the inverse transform + */ +kernel void fft3da(int inverse) { + + /* The FFT engine requires a sliding window for data reordering; data stored + * in this array is carried across loop iterations and shifted by 1 element + * every iteration; all loop dependencies derived from the uses of this + * array are simple transfers between adjacent array elements + */ + + float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; + + #pragma loop_coalesce + for(unsigned j = 0; j < N; j++){ + for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { + float2x8 data; + + if (i < N * (N / POINTS)) { + data.i0 = read_channel_intel(chaninfft3da[0]); + data.i1 = read_channel_intel(chaninfft3da[1]); + data.i2 = read_channel_intel(chaninfft3da[2]); + data.i3 = read_channel_intel(chaninfft3da[3]); + data.i4 = read_channel_intel(chaninfft3da[4]); + data.i5 = read_channel_intel(chaninfft3da[5]); + data.i6 = read_channel_intel(chaninfft3da[6]); + data.i7 = read_channel_intel(chaninfft3da[7]); + } + else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + + data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); + + // Write result to channels + if (i >= N / POINTS - 1) { + write_channel_intel(chaninTranspose[0], data.i0); + write_channel_intel(chaninTranspose[1], data.i1); + write_channel_intel(chaninTranspose[2], data.i2); + write_channel_intel(chaninTranspose[3], data.i3); + write_channel_intel(chaninTranspose[4], data.i4); + write_channel_intel(chaninTranspose[5], data.i5); + write_channel_intel(chaninTranspose[6], data.i6); + write_channel_intel(chaninTranspose[7], data.i7); + } + } + } +} + +kernel void transpose2d() { + const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 + bool is_bufA = false, is_bitrevA = false; + + float2 buf[2][DEPTH][POINTS]; + //float2 bitrev_in[2][N], bitrev_out[2][N]; + //float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; + float2 bitrev_in[2][N]; + float2 __attribute__((memory, numbanks(8))) bitrev_out[2][N]; + + int initial_delay = DELAY + DELAY; // for each of the bitrev buffer + + // additional iterations to fill the buffers + for(int step = -initial_delay; step < ((N * DEPTH) + DEPTH); step++){ + + float2x8 data, data_out; + if (step < ((N * DEPTH) - initial_delay)) { + data.i0 = read_channel_intel(chaninTranspose[0]); + data.i1 = read_channel_intel(chaninTranspose[1]); + data.i2 = read_channel_intel(chaninTranspose[2]); + data.i3 = read_channel_intel(chaninTranspose[3]); + data.i4 = read_channel_intel(chaninTranspose[4]); + data.i5 = read_channel_intel(chaninTranspose[5]); + data.i6 = read_channel_intel(chaninTranspose[6]); + data.i7 = read_channel_intel(chaninTranspose[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + + // Swap buffers every N*N/8 iterations + // starting from the additional delay of N/8 iterations + is_bufA = (( (step + DELAY) & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; + + // Swap bitrev buffers every N/8 iterations + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + unsigned row = step & (DEPTH - 1); + data = bitreverse_in(data, + is_bitrevA ? bitrev_in[0] : bitrev_in[1], + is_bitrevA ? bitrev_in[1] : bitrev_in[0], + row); + + writeBuf(data, + is_bufA ? buf[0] : buf[1], + step, DELAY); + + data_out = readBuf( + is_bufA ? buf[1] : buf[0], + step); + + unsigned start_row = (step + DELAY) & (DEPTH -1); + data_out = bitreverse_out( + is_bitrevA ? bitrev_out[0] : bitrev_out[1], + is_bitrevA ? bitrev_out[1] : bitrev_out[0], + data_out, start_row); + + + if (step >= (DEPTH)) { + write_channel_intel(chaninfft3db[0], data_out.i0); + write_channel_intel(chaninfft3db[1], data_out.i1); + write_channel_intel(chaninfft3db[2], data_out.i2); + write_channel_intel(chaninfft3db[3], data_out.i3); + write_channel_intel(chaninfft3db[4], data_out.i4); + write_channel_intel(chaninfft3db[5], data_out.i5); + write_channel_intel(chaninfft3db[6], data_out.i6); + write_channel_intel(chaninfft3db[7], data_out.i7); + } + } +} + +kernel void fft3db(int inverse) { + + /* The FFT engine requires a sliding window for data reordering; data stored + * in this array is carried across loop iterations and shifted by 1 element + * every iteration; all loop dependencies derived from the uses of this + * array are simple transfers between adjacent array elements + */ + + float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; + + #pragma loop_coalesce + for(unsigned j = 0; j < N; j++){ + for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { + float2x8 data; + + if (i < N * (N / POINTS)) { + data.i0 = read_channel_intel(chaninfft3db[0]); + data.i1 = read_channel_intel(chaninfft3db[1]); + data.i2 = read_channel_intel(chaninfft3db[2]); + data.i3 = read_channel_intel(chaninfft3db[3]); + data.i4 = read_channel_intel(chaninfft3db[4]); + data.i5 = read_channel_intel(chaninfft3db[5]); + data.i6 = read_channel_intel(chaninfft3db[6]); + data.i7 = read_channel_intel(chaninfft3db[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + + data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); + + if (i >= N / POINTS - 1) { + write_channel_intel(chaninTranspose3D[0], data.i0); + write_channel_intel(chaninTranspose3D[1], data.i1); + write_channel_intel(chaninTranspose3D[2], data.i2); + write_channel_intel(chaninTranspose3D[3], data.i3); + write_channel_intel(chaninTranspose3D[4], data.i4); + write_channel_intel(chaninTranspose3D[5], data.i5); + write_channel_intel(chaninTranspose3D[6], data.i6); + write_channel_intel(chaninTranspose3D[7], data.i7); + } + } + } +} + +kernel void transpose3D() { + + const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 + bool is_bufA = false, is_bitrevA = false; + + float2 buf[2][DEPTH][POINTS]; + local float2 buf3D[N * N * N]; + //float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; + float2 bitrev_in[2][N]; + + int initial_delay = DELAY; // for each of the bitrev buffer + // additional iterations to fill the buffers + for(int step = -initial_delay; step < ((N * DEPTH) + DEPTH); step++){ + + float2x8 data, data_out; + if (step < ((N * DEPTH) - initial_delay)) { + data.i0 = read_channel_intel(chaninTranspose3D[0]); + data.i1 = read_channel_intel(chaninTranspose3D[1]); + data.i2 = read_channel_intel(chaninTranspose3D[2]); + data.i3 = read_channel_intel(chaninTranspose3D[3]); + data.i4 = read_channel_intel(chaninTranspose3D[4]); + data.i5 = read_channel_intel(chaninTranspose3D[5]); + data.i6 = read_channel_intel(chaninTranspose3D[6]); + data.i7 = read_channel_intel(chaninTranspose3D[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + // Swap buffers every N*N/8 iterations + // starting from the additional delay of N/8 iterations + is_bufA = (( step & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; + + // Swap bitrev buffers every N/8 iterations + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + unsigned row = step & (DEPTH - 1); + data = bitreverse_in(data, + is_bitrevA ? bitrev_in[0] : bitrev_in[1], + is_bitrevA ? bitrev_in[1] : bitrev_in[0], + row); + + writeBuf(data, + is_bufA ? buf[0] : buf[1], + step, 0); + + data_out = readBuf_store( + is_bufA ? buf[1] : buf[0], + step); + + if (step >= (DEPTH)) { + unsigned index = (step - DEPTH) * 8; + + buf3D[index + 0] = data_out.i0; + buf3D[index + 1] = data_out.i1; + buf3D[index + 2] = data_out.i2; + buf3D[index + 3] = data_out.i3; + buf3D[index + 4] = data_out.i4; + buf3D[index + 5] = data_out.i5; + buf3D[index + 6] = data_out.i6; + buf3D[index + 7] = data_out.i7; + } + } + + is_bufA = false; + is_bitrevA = false; + + float2 __attribute__((memory, numbanks(8))) bitrev_out[2][N]; + + // additional iterations to fill the buffers + for(unsigned step = 0; step < (N * DEPTH) + DEPTH + DELAY; step++){ + // increment z by 1 every N/8 steps until (N*N/ 8) + unsigned start_index = step + DELAY; + unsigned zdim = (step >> (LOGN - LOGPOINTS)) & (N - 1); + + // increment y by 1 every N*N/8 points until N + unsigned ydim = (step >> (LOGN + LOGN - LOGPOINTS)) & (N - 1); + + // increment by 8 until N / 8 + unsigned xdim = (step * 8) & (N - 1); + + // increment by 1 every N*N*N / 8 steps + unsigned batch_index = (step >> (LOGN + LOGN + LOGN - LOGPOINTS)); + + unsigned index = (batch_index * N * N * N) + (zdim * N * N) + (ydim * N) + xdim; + + float2x8 data, data_out; + if (step < (N * DEPTH)) { + data.i0 = buf3D[index + 0]; + data.i1 = buf3D[index + 1]; + data.i2 = buf3D[index + 2]; + data.i3 = buf3D[index + 3]; + data.i4 = buf3D[index + 4]; + data.i5 = buf3D[index + 5]; + data.i6 = buf3D[index + 6]; + data.i7 = buf3D[index + 7]; + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + + is_bufA = (( step & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; + + // Swap bitrev buffers every N/8 iterations + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + writeBuf(data, + is_bufA ? buf[0] : buf[1], + step, 0); + + data_out = readBuf_fetch( + is_bufA ? buf[1] : buf[0], + step, 0); + + unsigned start_row = step & (DEPTH -1); + data_out = bitreverse_out( + is_bitrevA ? bitrev_out[0] : bitrev_out[1], + is_bitrevA ? bitrev_out[1] : bitrev_out[0], + data_out, start_row); + + if (step >= (DEPTH + DELAY)) { + + write_channel_intel(chaninfft3dc[0], data_out.i0); + write_channel_intel(chaninfft3dc[1], data_out.i1); + write_channel_intel(chaninfft3dc[2], data_out.i2); + write_channel_intel(chaninfft3dc[3], data_out.i3); + write_channel_intel(chaninfft3dc[4], data_out.i4); + write_channel_intel(chaninfft3dc[5], data_out.i5); + write_channel_intel(chaninfft3dc[6], data_out.i6); + write_channel_intel(chaninfft3dc[7], data_out.i7); + } + } +} + +/* + * Input and output data in bit-reversed format + */ +kernel void fft3dc(int inverse) { + + /* The FFT engine requires a sliding window for data reordering; data stored + * in this array is carried across loop iterations and shifted by 1 element + * every iteration; all loop dependencies derived from the uses of this + * array are simple transfers between adjacent array elements + */ + + float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; + + #pragma loop_coalesce + for(unsigned j = 0; j < N; j++){ + + for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { + float2x8 data; + + if (i < N * (N / POINTS)) { + data.i0 = read_channel_intel(chaninfft3dc[0]); + data.i1 = read_channel_intel(chaninfft3dc[1]); + data.i2 = read_channel_intel(chaninfft3dc[2]); + data.i3 = read_channel_intel(chaninfft3dc[3]); + data.i4 = read_channel_intel(chaninfft3dc[4]); + data.i5 = read_channel_intel(chaninfft3dc[5]); + data.i6 = read_channel_intel(chaninfft3dc[6]); + data.i7 = read_channel_intel(chaninfft3dc[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + + // Perform one FFT step + data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); + + // Write result to channels + if (i >= N / POINTS - 1) { + write_channel_intel(chaninTranStore[0], data.i0); + write_channel_intel(chaninTranStore[1], data.i1); + write_channel_intel(chaninTranStore[2], data.i2); + write_channel_intel(chaninTranStore[3], data.i3); + write_channel_intel(chaninTranStore[4], data.i4); + write_channel_intel(chaninTranStore[5], data.i5); + write_channel_intel(chaninTranStore[6], data.i6); + write_channel_intel(chaninTranStore[7], data.i7); + } + } + } +} + +kernel void store(global float2 * restrict dest) { + + const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 + bool is_bufA = false, is_bitrevA = false; + + float2 buf[2][DEPTH][POINTS]; + float2 bitrev_in[2][N]; + //float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; + + int initial_delay = DELAY; // for each of the bitrev buffer + // additional iterations to fill the buffers + for(int step = -initial_delay; step < ((N * DEPTH) + DEPTH); step++){ + + float2x8 data, data_out; + if (step < ((N * DEPTH) - initial_delay)) { + data.i0 = read_channel_intel(chaninTranStore[0]); + data.i1 = read_channel_intel(chaninTranStore[1]); + data.i2 = read_channel_intel(chaninTranStore[2]); + data.i3 = read_channel_intel(chaninTranStore[3]); + data.i4 = read_channel_intel(chaninTranStore[4]); + data.i5 = read_channel_intel(chaninTranStore[5]); + data.i6 = read_channel_intel(chaninTranStore[6]); + data.i7 = read_channel_intel(chaninTranStore[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + // Swap buffers every N*N/8 iterations + // starting from the additional delay of N/8 iterations + is_bufA = (( step & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; + + // Swap bitrev buffers every N/8 iterations + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + unsigned row = step & (DEPTH - 1); + data = bitreverse_in(data, + is_bitrevA ? bitrev_in[0] : bitrev_in[1], + is_bitrevA ? bitrev_in[1] : bitrev_in[0], + row); + + writeBuf(data, + is_bufA ? buf[0] : buf[1], + step, 0); + + data_out = readBuf_store( + is_bufA ? buf[1] : buf[0], + step); + + if (step >= (DEPTH)) { + unsigned start_index = (step - DEPTH); + // increment z by 1 every N/8 steps until (N*N/ 8) + unsigned zdim = (start_index >> (LOGN - LOGPOINTS)) & (N - 1); + + // increment y by 1 every N*N/8 points until N + unsigned ydim = (start_index >> (LOGN + LOGN - LOGPOINTS)) & (N - 1); + + // incremenet by 8 until N / 8 + unsigned xdim = (start_index * 8) & ( N - 1); + //unsigned index = (step - DEPTH) * 8; + + // increment by N*N*N + unsigned cube = LOGN + LOGN + LOGN - LOGPOINTS; + + // increment by 1 every N*N*N / 8 steps + unsigned batch_index = (start_index >> cube); + //unsigned batch_index = 0; + + unsigned index = (batch_index * N * N * N) + (zdim * N * N) + (ydim * N) + xdim; + + dest[index + 0] = data_out.i0; + dest[index + 1] = data_out.i1; + dest[index + 2] = data_out.i2; + dest[index + 3] = data_out.i3; + dest[index + 4] = data_out.i4; + dest[index + 5] = data_out.i5; + dest[index + 6] = data_out.i6; + dest[index + 7] = data_out.i7; + } + } +} \ No newline at end of file diff --git a/tests/test_fft1d_fpga.cpp b/tests/test_fft1d_fpga.cpp index 7a5db7a..0d991ae 100644 --- a/tests/test_fft1d_fpga.cpp +++ b/tests/test_fft1d_fpga.cpp @@ -7,6 +7,7 @@ extern "C" { #include "fftfpga/fftfpga.h" #include "helper.h" #include + #include #ifdef USE_FFTW #include @@ -51,7 +52,7 @@ TEST(fft1dFPGATest, CorrectnessSp){ // malloc data to input fftf_create_data(inp, N); - int isInit= fpga_initialize("Intel(R) FPGA", "emu_64_fft1d/fft1d.aocx", 0); + int isInit= fpga_initialize("Intel(R) FPGA", "emu_64_fft1d/fft1d.aocx", false); ASSERT_EQ(isInit, 0); fpga_t fft_time = fftfpgaf_c2c_1d(64, inp, out, 0, 1); diff --git a/tests/test_fft2d_fpga.cpp b/tests/test_fft2d_fpga.cpp index 5c75180..ae381a1 100644 --- a/tests/test_fft2d_fpga.cpp +++ b/tests/test_fft2d_fpga.cpp @@ -12,6 +12,7 @@ extern "C" { #include "fftfpga/fftfpga.h" #include "helper.h" #include "verify_fftw.h" + #include } /** @@ -46,7 +47,7 @@ TEST(fft2dFPGATest, CorrectnessBRAM){ const int N = (1 << 6); fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft2d_bram/fft2d_bram.aocx", 0); + int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft2d_bram/fft2d_bram.aocx", false); EXPECT_EQ(isInit, 0); size_t sz = sizeof(float2) * N * N; diff --git a/tests/test_fft3d_fpga.cpp b/tests/test_fft3d_fpga.cpp index 63ecbc6..4cfedb4 100644 --- a/tests/test_fft3d_fpga.cpp +++ b/tests/test_fft3d_fpga.cpp @@ -12,6 +12,7 @@ extern "C" { #include "fftfpga/fftfpga.h" #include "helper.h" #include "verify_fftw.h" + #include } /** @@ -46,7 +47,7 @@ TEST(fft3dFPGATest, CorrectnessBRAM){ const int N = (1 << 6); fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft3d_bram/fft3d_bram.aocx", 0); + int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft3d_bram/fft3d_bram.aocx", false); ASSERT_EQ(isInit, 0); size_t sz = sizeof(float2) * N * N * N; diff --git a/tests/test_fft_setup.cpp b/tests/test_fft_setup.cpp index 2f0ab91..a8da7a9 100644 --- a/tests/test_fft_setup.cpp +++ b/tests/test_fft_setup.cpp @@ -2,6 +2,7 @@ #include "gtest/gtest.h" // finds this because gtest is linked #include +#include #ifdef USE_FFTW #include #endif @@ -16,16 +17,16 @@ extern "C" { */ TEST(fftFPGASetupTest, ValidInit){ // empty path argument - EXPECT_EQ(fpga_initialize("Intel(R) FPGA", "", 0), -1); + EXPECT_EQ(fpga_initialize("Intel(R) FPGA", "", false), -1); // wrong platform name - EXPECT_EQ(fpga_initialize("TEST", "fft1d_emulate.aocx", 0), -2); + EXPECT_EQ(fpga_initialize("TEST", "fft1d_emulate.aocx", false), -2); // wrong path argument - EXPECT_EQ(fpga_initialize("Intel(R) FPGA", "TEST", 0), -4); + EXPECT_EQ(fpga_initialize("Intel(R) FPGA", "TEST", false), -4); // right path and platform names - EXPECT_EQ(fpga_initialize("Intel(R) FPGA", "emu_64_fft3d_bram/fft3d_bram.aocx", 0), 0); + EXPECT_EQ(fpga_initialize("Intel(R) FPGA", "emu_64_fft3d_bram/fft3d_bram.aocx", false), 0); fpga_final(); } From 724c74787887520f6c92d9ea037caecb6b635c27 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Wed, 9 Sep 2020 16:34:35 +0200 Subject: [PATCH 25/76] FFT3D: renamed bram transpose files, dependencies --- .gitignore | 3 +- .gitlab-ci.yml | 4 +- kernels/fft3d/CMakeLists.txt | 2 +- kernels/fft3d/fft3d_bram.cl | 674 +++++++++++++++++++------------ kernels/fft3d/fft3d_bram_opt.cl | 495 ----------------------- kernels/fft3d/fft3d_bram_triv.cl | 317 +++++++++++++++ tests/CMakeLists.txt | 2 +- 7 files changed, 749 insertions(+), 748 deletions(-) delete mode 100755 kernels/fft3d/fft3d_bram_opt.cl create mode 100755 kernels/fft3d/fft3d_bram_triv.cl diff --git a/.gitignore b/.gitignore index ddafe42..6284390 100755 --- a/.gitignore +++ b/.gitignore @@ -6,8 +6,9 @@ fpgabitstream/ reports/ vscode/ scripts/ -build_svm/ +build* svm_build/ +debug* tags *.DS_Store diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index f293684..42ef972 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -15,7 +15,7 @@ build-all: - make fft1d_emu - make fft2d_ddr_emu - make fft2d_bram_emu - - make fft3d_ddr_triv_emu + - make fft3d_ddr_emu - make fft3d_bram_emu - chmod +x bin/fft3d bin/fft2d bin/fft1d bin/test_fftfpga artifacts: @@ -23,7 +23,7 @@ build-all: - build/bin/emu_64_fft1d/fft1d.aocx - build/bin/emu_64_fft2d_ddr/fft2d_bram.aocx - build/bin/emu_64_fft2d_bram/fft2d_ddr.aocx - - build/bin/emu_64_fft3d_ddr_triv/fft3d_ddr_triv.aocx + - build/bin/emu_64_fft3d_ddr/fft3d_ddr.aocx - build/bin/emu_64_fft3d_bram/fft3d_bram.aocx - build/bin/test_fftfpga - build/bin/fft3d diff --git a/kernels/fft3d/CMakeLists.txt b/kernels/fft3d/CMakeLists.txt index 69ba673..fc28162 100644 --- a/kernels/fft3d/CMakeLists.txt +++ b/kernels/fft3d/CMakeLists.txt @@ -9,7 +9,7 @@ cmake_minimum_required(VERSION 3.10) # - ${kernel_name}_syn: to generate synthesis binary ## set(CL_PATH "${fftkernelsfpga_SOURCE_DIR}/fft3d") -set(kernels fft3d_bram fft3d_bram_opt fft3d_ddr fft3d_ddr_triv) +set(kernels fft3d_bram fft3d_bram_triv fft3d_ddr fft3d_ddr_triv) include(${fftkernelsfpga_SOURCE_DIR}/cmake/genKernelTargets.cmake) diff --git a/kernels/fft3d/fft3d_bram.cl b/kernels/fft3d/fft3d_bram.cl index e23ee64..3decfa0 100755 --- a/kernels/fft3d/fft3d_bram.cl +++ b/kernels/fft3d/fft3d_bram.cl @@ -1,73 +1,64 @@ -// Author: Arjun Ramaswami +// Author: Arjun Ramaswami -#include "fft_8.cl" - -// Source the log(size) (log(1k) = 10) from a header shared with the host code #include "fft_config.h" +#include "fft_8.cl" +#include "../matrixTranspose/diagonal_bitrev.cl" #pragma OPENCL EXTENSION cl_intel_channels : enable -channel float2 chaninfft[8] __attribute__((depth(8))); -channel float2 chanoutfft[8] __attribute__((depth(8))); -channel float2 chaninfft2[8] __attribute__((depth(8))); -channel float2 chanoutfft2[8] __attribute__((depth(8))); -channel float2 chaninfetch[8] __attribute__((depth(8))); - - -int bit_reversed(int x, int bits) { - int y = 0; - #pragma unroll - for (int i = 0; i < bits; i++) { - y <<= 1; - y |= x & 1; - x >>= 1; - } - return y; -} -void sendTofft(float2 *buffer, unsigned j){ - write_channel_intel(chaninfft[0], buffer[j]); // 0 - write_channel_intel(chaninfft[1], buffer[4 * N / 8 + j]); // 32 - write_channel_intel(chaninfft[2], buffer[2 * N / 8 + j]); // 16 - write_channel_intel(chaninfft[3], buffer[6 * N / 8 + j]); // 48 - write_channel_intel(chaninfft[4], buffer[N / 8 + j]); // 8 - write_channel_intel(chaninfft[5], buffer[5 * N / 8 + j]); // 40 - write_channel_intel(chaninfft[6], buffer[3 * N / 8 + j]); // 24 - write_channel_intel(chaninfft[7], buffer[7 * N / 8 + j]); // 54 -} +channel float2 chaninfft3da[POINTS] __attribute__((depth(POINTS))); +channel float2 chaninfft3db[POINTS] __attribute__((depth(POINTS))); +channel float2 chaninfft3dc[POINTS] __attribute__((depth(POINTS))); -// Kernel that fetches data from global memory -kernel void fetch(global volatile float2 * restrict src) { +channel float2 chaninTranspose[POINTS] __attribute__((depth(POINTS))); +channel float2 chaninTranspose3D[POINTS] __attribute__((depth(POINTS))); - for(unsigned k = 0; k < (1 << (LOGN + LOGN)); k++){ +channel float2 chaninTranStore[POINTS] __attribute__((depth(POINTS))); - float2 buf[N]; - #pragma unroll 8 - for(unsigned i = 0; i < N; i++){ - buf[i & ((1<= delay) { + write_channel_intel(chaninfft3da[0], data.i0); + write_channel_intel(chaninfft3da[1], data.i1); + write_channel_intel(chaninfft3da[2], data.i2); + write_channel_intel(chaninfft3da[3], data.i3); + write_channel_intel(chaninfft3da[4], data.i4); + write_channel_intel(chaninfft3da[5], data.i5); + write_channel_intel(chaninfft3da[6], data.i6); + write_channel_intel(chaninfft3da[7], data.i7); } } } @@ -75,7 +66,6 @@ kernel void fetch(global volatile float2 * restrict src) { /* This single work-item task wraps the FFT engine * 'inverse' toggles between the direct and the inverse transform */ - kernel void fft3da(int inverse) { /* The FFT engine requires a sliding window for data reordering; data stored @@ -85,121 +75,297 @@ kernel void fft3da(int inverse) { */ float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; - for( int j = 0; j < N * 2; j++){ - - // needs to run "N / 8 - 1" additional iterations to drain the last outputs - for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { - float2x8 data; - - // Read data from channels - if (i < N * (N / POINTS)) { - data.i0 = read_channel_intel(chaninfft[0]); - data.i1 = read_channel_intel(chaninfft[1]); - data.i2 = read_channel_intel(chaninfft[2]); - data.i3 = read_channel_intel(chaninfft[3]); - data.i4 = read_channel_intel(chaninfft[4]); - data.i5 = read_channel_intel(chaninfft[5]); - data.i6 = read_channel_intel(chaninfft[6]); - data.i7 = read_channel_intel(chaninfft[7]); - - } else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } - - // Perform one FFT step - data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); - - // Write result to channels - if (i >= N / POINTS - 1) { - write_channel_intel(chanoutfft[0], data.i0); - write_channel_intel(chanoutfft[1], data.i1); - write_channel_intel(chanoutfft[2], data.i2); - write_channel_intel(chanoutfft[3], data.i3); - write_channel_intel(chanoutfft[4], data.i4); - write_channel_intel(chanoutfft[5], data.i5); - write_channel_intel(chanoutfft[6], data.i6); - write_channel_intel(chanoutfft[7], data.i7); - } + + #pragma loop_coalesce + for(unsigned j = 0; j < N; j++){ + for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { + float2x8 data; + + if (i < N * (N / POINTS)) { + data.i0 = read_channel_intel(chaninfft3da[0]); + data.i1 = read_channel_intel(chaninfft3da[1]); + data.i2 = read_channel_intel(chaninfft3da[2]); + data.i3 = read_channel_intel(chaninfft3da[3]); + data.i4 = read_channel_intel(chaninfft3da[4]); + data.i5 = read_channel_intel(chaninfft3da[5]); + data.i6 = read_channel_intel(chaninfft3da[6]); + data.i7 = read_channel_intel(chaninfft3da[7]); + } + else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; } - } -} -// Transposes fetched data; stores them to global memory -kernel void transpose(global float2 * restrict dest) { + data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); + + // Write result to channels + if (i >= N / POINTS - 1) { + write_channel_intel(chaninTranspose[0], data.i0); + write_channel_intel(chaninTranspose[1], data.i1); + write_channel_intel(chaninTranspose[2], data.i2); + write_channel_intel(chaninTranspose[3], data.i3); + write_channel_intel(chaninTranspose[4], data.i4); + write_channel_intel(chaninTranspose[5], data.i5); + write_channel_intel(chaninTranspose[6], data.i6); + write_channel_intel(chaninTranspose[7], data.i7); + } + } + } +} - unsigned revcolt, where_read, where_write, where; +kernel void transpose2d() { + const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 + bool is_bufA = false, is_bitrevA = false; - local float2 buf[N * N]; + float2 buf[2][DEPTH][POINTS]; + //float2 bitrev_in[2][N], bitrev_out[2][N]; + //float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; + float2 bitrev_in[2][N]; + float2 __attribute__((memory, numbanks(8))) bitrev_out[2][N]; + + int initial_delay = DELAY + DELAY; // for each of the bitrev buffer + + // additional iterations to fill the buffers + for(int step = -initial_delay; step < ((N * DEPTH) + DEPTH); step++){ + + float2x8 data, data_out; + if (step < ((N * DEPTH) - initial_delay)) { + data.i0 = read_channel_intel(chaninTranspose[0]); + data.i1 = read_channel_intel(chaninTranspose[1]); + data.i2 = read_channel_intel(chaninTranspose[2]); + data.i3 = read_channel_intel(chaninTranspose[3]); + data.i4 = read_channel_intel(chaninTranspose[4]); + data.i5 = read_channel_intel(chaninTranspose[5]); + data.i6 = read_channel_intel(chaninTranspose[6]); + data.i7 = read_channel_intel(chaninTranspose[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } - // Perform N times N*N transpositions and transfers - for(unsigned p = 0; p < N; p++){ + // Swap buffers every N*N/8 iterations + // starting from the additional delay of N/8 iterations + is_bufA = (( (step + DELAY) & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; + + // Swap bitrev buffers every N/8 iterations + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + unsigned row = step & (DEPTH - 1); + data = bitreverse_in(data, + is_bitrevA ? bitrev_in[0] : bitrev_in[1], + is_bitrevA ? bitrev_in[1] : bitrev_in[0], + row); + + writeBuf(data, + is_bufA ? buf[0] : buf[1], + step, DELAY); + + data_out = readBuf( + is_bufA ? buf[1] : buf[0], + step); + + unsigned start_row = (step + DELAY) & (DEPTH -1); + data_out = bitreverse_out( + is_bitrevA ? bitrev_out[0] : bitrev_out[1], + is_bitrevA ? bitrev_out[1] : bitrev_out[0], + data_out, start_row); + + + if (step >= (DEPTH)) { + write_channel_intel(chaninfft3db[0], data_out.i0); + write_channel_intel(chaninfft3db[1], data_out.i1); + write_channel_intel(chaninfft3db[2], data_out.i2); + write_channel_intel(chaninfft3db[3], data_out.i3); + write_channel_intel(chaninfft3db[4], data_out.i4); + write_channel_intel(chaninfft3db[5], data_out.i5); + write_channel_intel(chaninfft3db[6], data_out.i6); + write_channel_intel(chaninfft3db[7], data_out.i7); + } + } +} - for(unsigned i = 0; i < N; i++){ - for(unsigned k = 0; k < (N / 8); k++){ - where_read = ((i << LOGN) + (k << LOGPOINTS)); +kernel void fft3db(int inverse) { - buf[where_read + 0] = read_channel_intel(chanoutfft[0]); - buf[where_read + 1] = read_channel_intel(chanoutfft[1]); - buf[where_read + 2] = read_channel_intel(chanoutfft[2]); - buf[where_read + 3] = read_channel_intel(chanoutfft[3]); - buf[where_read + 4] = read_channel_intel(chanoutfft[4]); - buf[where_read + 5] = read_channel_intel(chanoutfft[5]); - buf[where_read + 6] = read_channel_intel(chanoutfft[6]); - buf[where_read + 7] = read_channel_intel(chanoutfft[7]); - } - } + /* The FFT engine requires a sliding window for data reordering; data stored + * in this array is carried across loop iterations and shifted by 1 element + * every iteration; all loop dependencies derived from the uses of this + * array are simple transfers between adjacent array elements + */ - for(unsigned i = 0; i < N; i++){ - revcolt = bit_reversed(i, LOGN); + float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; - for(unsigned k = 0; k < (N / 8); k++){ - where_write = ((k * N) + revcolt); + #pragma loop_coalesce + for(unsigned j = 0; j < N; j++){ + for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { + float2x8 data; + + if (i < N * (N / POINTS)) { + data.i0 = read_channel_intel(chaninfft3db[0]); + data.i1 = read_channel_intel(chaninfft3db[1]); + data.i2 = read_channel_intel(chaninfft3db[2]); + data.i3 = read_channel_intel(chaninfft3db[3]); + data.i4 = read_channel_intel(chaninfft3db[4]); + data.i5 = read_channel_intel(chaninfft3db[5]); + data.i6 = read_channel_intel(chaninfft3db[6]); + data.i7 = read_channel_intel(chaninfft3db[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } - write_channel_intel(chaninfft2[0], buf[where_write]); // 0 - write_channel_intel(chaninfft2[1], buf[where_write + 4 * (N / 8) * N]); // 32 - write_channel_intel(chaninfft2[2], buf[where_write + 2 * (N / 8) * N]); // 16 - write_channel_intel(chaninfft2[3], buf[where_write + 6 * (N / 8) * N]); // 48 - write_channel_intel(chaninfft2[4], buf[where_write + (N / 8) * N]); // 8 - write_channel_intel(chaninfft2[5], buf[where_write + 5 * (N / 8) * N]); // 40 - write_channel_intel(chaninfft2[6], buf[where_write + 3 * (N / 8) * N]); // 24 - write_channel_intel(chaninfft2[7], buf[where_write + 7 * (N / 8) * N]); // 54 + data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); + + if (i >= N / POINTS - 1) { + write_channel_intel(chaninTranspose3D[0], data.i0); + write_channel_intel(chaninTranspose3D[1], data.i1); + write_channel_intel(chaninTranspose3D[2], data.i2); + write_channel_intel(chaninTranspose3D[3], data.i3); + write_channel_intel(chaninTranspose3D[4], data.i4); + write_channel_intel(chaninTranspose3D[5], data.i5); + write_channel_intel(chaninTranspose3D[6], data.i6); + write_channel_intel(chaninTranspose3D[7], data.i7); } } } +} - for(unsigned p = 0; p < N; p++){ - - for(unsigned i = 0; i < N; i++){ - for(unsigned j = 0; j < (N / 8); j++){ - where = ((i << LOGN) + (j << LOGPOINTS)); - - buf[where + 0] = read_channel_intel(chanoutfft[0]); - buf[where + 1] = read_channel_intel(chanoutfft[1]); - buf[where + 2] = read_channel_intel(chanoutfft[2]); - buf[where + 3] = read_channel_intel(chanoutfft[3]); - buf[where + 4] = read_channel_intel(chanoutfft[4]); - buf[where + 5] = read_channel_intel(chanoutfft[5]); - buf[where + 6] = read_channel_intel(chanoutfft[6]); - buf[where + 7] = read_channel_intel(chanoutfft[7]); - } - } +kernel void transpose3D() { - for(unsigned i = 0; i < N; i++){ - revcolt = bit_reversed(i, LOGN); - where = ( (i << (LOGN + LOGN)) + (p << LOGN)); + const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 + bool is_bufA = false, is_bitrevA = false; - #pragma unroll 8 - for( unsigned q = 0; q < N; q++){ - dest[where + q] = buf[(q << LOGN) + revcolt]; - } + float2 buf[2][DEPTH][POINTS]; + local float2 buf3D[N * N * N]; + //float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; + float2 bitrev_in[2][N]; + + int initial_delay = DELAY; // for each of the bitrev buffer + // additional iterations to fill the buffers + for(int step = -initial_delay; step < ((N * DEPTH) + DEPTH); step++){ + + float2x8 data, data_out; + if (step < ((N * DEPTH) - initial_delay)) { + data.i0 = read_channel_intel(chaninTranspose3D[0]); + data.i1 = read_channel_intel(chaninTranspose3D[1]); + data.i2 = read_channel_intel(chaninTranspose3D[2]); + data.i3 = read_channel_intel(chaninTranspose3D[3]); + data.i4 = read_channel_intel(chaninTranspose3D[4]); + data.i5 = read_channel_intel(chaninTranspose3D[5]); + data.i6 = read_channel_intel(chaninTranspose3D[6]); + data.i7 = read_channel_intel(chaninTranspose3D[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + // Swap buffers every N*N/8 iterations + // starting from the additional delay of N/8 iterations + is_bufA = (( step & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; + + // Swap bitrev buffers every N/8 iterations + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + unsigned row = step & (DEPTH - 1); + data = bitreverse_in(data, + is_bitrevA ? bitrev_in[0] : bitrev_in[1], + is_bitrevA ? bitrev_in[1] : bitrev_in[0], + row); + + writeBuf(data, + is_bufA ? buf[0] : buf[1], + step, 0); + + data_out = readBuf_store( + is_bufA ? buf[1] : buf[0], + step); + + if (step >= (DEPTH)) { + unsigned index = (step - DEPTH) * 8; + + buf3D[index + 0] = data_out.i0; + buf3D[index + 1] = data_out.i1; + buf3D[index + 2] = data_out.i2; + buf3D[index + 3] = data_out.i3; + buf3D[index + 4] = data_out.i4; + buf3D[index + 5] = data_out.i5; + buf3D[index + 6] = data_out.i6; + buf3D[index + 7] = data_out.i7; } - } + is_bufA = false; + is_bitrevA = false; + + float2 __attribute__((memory, numbanks(8))) bitrev_out[2][N]; + + // additional iterations to fill the buffers + for(unsigned step = 0; step < (N * DEPTH) + DEPTH + DELAY; step++){ + // increment z by 1 every N/8 steps until (N*N/ 8) + unsigned start_index = step + DELAY; + unsigned zdim = (step >> (LOGN - LOGPOINTS)) & (N - 1); + + // increment y by 1 every N*N/8 points until N + unsigned ydim = (step >> (LOGN + LOGN - LOGPOINTS)) & (N - 1); + + // increment by 8 until N / 8 + unsigned xdim = (step * 8) & (N - 1); + + // increment by 1 every N*N*N / 8 steps + unsigned batch_index = (step >> (LOGN + LOGN + LOGN - LOGPOINTS)); + + unsigned index = (batch_index * N * N * N) + (zdim * N * N) + (ydim * N) + xdim; + + float2x8 data, data_out; + if (step < (N * DEPTH)) { + data.i0 = buf3D[index + 0]; + data.i1 = buf3D[index + 1]; + data.i2 = buf3D[index + 2]; + data.i3 = buf3D[index + 3]; + data.i4 = buf3D[index + 4]; + data.i5 = buf3D[index + 5]; + data.i6 = buf3D[index + 6]; + data.i7 = buf3D[index + 7]; + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + + is_bufA = (( step & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; + + // Swap bitrev buffers every N/8 iterations + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + writeBuf(data, + is_bufA ? buf[0] : buf[1], + step, 0); + + data_out = readBuf_fetch( + is_bufA ? buf[1] : buf[0], + step, 0); + + unsigned start_row = step & (DEPTH -1); + data_out = bitreverse_out( + is_bitrevA ? bitrev_out[0] : bitrev_out[1], + is_bitrevA ? bitrev_out[1] : bitrev_out[0], + data_out, start_row); + + if (step >= (DEPTH + DELAY)) { + + write_channel_intel(chaninfft3dc[0], data_out.i0); + write_channel_intel(chaninfft3dc[1], data_out.i1); + write_channel_intel(chaninfft3dc[2], data_out.i2); + write_channel_intel(chaninfft3dc[3], data_out.i3); + write_channel_intel(chaninfft3dc[4], data_out.i4); + write_channel_intel(chaninfft3dc[5], data_out.i5); + write_channel_intel(chaninfft3dc[6], data_out.i6); + write_channel_intel(chaninfft3dc[7], data_out.i7); + } + } } -kernel void fft3db(int inverse) { +/* + * Input and output data in bit-reversed format + */ +kernel void fft3dc(int inverse) { /* The FFT engine requires a sliding window for data reordering; data stored * in this array is carried across loop iterations and shifted by 1 element @@ -208,110 +374,122 @@ kernel void fft3db(int inverse) { */ float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; - for( int j = 0; j < N; j++){ - - for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { - float2x8 data; - - // Read data from channels - if (i < N * (N / POINTS)) { - data.i0 = read_channel_intel(chaninfft2[0]); - data.i1 = read_channel_intel(chaninfft2[1]); - data.i2 = read_channel_intel(chaninfft2[2]); - data.i3 = read_channel_intel(chaninfft2[3]); - data.i4 = read_channel_intel(chaninfft2[4]); - data.i5 = read_channel_intel(chaninfft2[5]); - data.i6 = read_channel_intel(chaninfft2[6]); - data.i7 = read_channel_intel(chaninfft2[7]); - } else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } - - // Perform one FFT step - data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); - - // Write result to channels - if (i >= N / POINTS - 1) { - write_channel_intel(chanoutfft2[0], data.i0); - write_channel_intel(chanoutfft2[1], data.i1); - write_channel_intel(chanoutfft2[2], data.i2); - write_channel_intel(chanoutfft2[3], data.i3); - write_channel_intel(chanoutfft2[4], data.i4); - write_channel_intel(chanoutfft2[5], data.i5); - write_channel_intel(chanoutfft2[6], data.i6); - write_channel_intel(chanoutfft2[7], data.i7); - } - } - } -} - -// Stores data for 3rd dim FFT -kernel void transpose3D(){ - unsigned revcolt, where; - unsigned where_test; - - local float2 buf_3d[N * N * N]; - local float2 buf[N * N]; - - // perform N*N*N writes to buffer - for(unsigned m = 0; m < N; m++){ - - for(unsigned i = 0; i < N; i++){ - for(unsigned j = 0; j < (N / 8); j++){ - where = ((i << LOGN) + (j << LOGPOINTS)); - - buf[where + 0] = read_channel_intel(chanoutfft2[0]); - buf[where + 1] = read_channel_intel(chanoutfft2[1]); - buf[where + 2] = read_channel_intel(chanoutfft2[2]); - buf[where + 3] = read_channel_intel(chanoutfft2[3]); - buf[where + 4] = read_channel_intel(chanoutfft2[4]); - buf[where + 5] = read_channel_intel(chanoutfft2[5]); - buf[where + 6] = read_channel_intel(chanoutfft2[6]); - buf[where + 7] = read_channel_intel(chanoutfft2[7]); + #pragma loop_coalesce + for(unsigned j = 0; j < N; j++){ + + for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { + float2x8 data; + + if (i < N * (N / POINTS)) { + data.i0 = read_channel_intel(chaninfft3dc[0]); + data.i1 = read_channel_intel(chaninfft3dc[1]); + data.i2 = read_channel_intel(chaninfft3dc[2]); + data.i3 = read_channel_intel(chaninfft3dc[3]); + data.i4 = read_channel_intel(chaninfft3dc[4]); + data.i5 = read_channel_intel(chaninfft3dc[5]); + data.i6 = read_channel_intel(chaninfft3dc[6]); + data.i7 = read_channel_intel(chaninfft3dc[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; } - } - for(unsigned i = 0; i < N; i++){ - revcolt = bit_reversed(i, LOGN); - where = (i << LOGN) + (m << (LOGN + LOGN)); - - #pragma unroll 8 - for( unsigned u = 0; u < N; u++){ - buf_3d[where + u] = buf[(u << LOGN) + revcolt]; + // Perform one FFT step + data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); + + // Write result to channels + if (i >= N / POINTS - 1) { + write_channel_intel(chaninTranStore[0], data.i0); + write_channel_intel(chaninTranStore[1], data.i1); + write_channel_intel(chaninTranStore[2], data.i2); + write_channel_intel(chaninTranStore[3], data.i3); + write_channel_intel(chaninTranStore[4], data.i4); + write_channel_intel(chaninTranStore[5], data.i5); + write_channel_intel(chaninTranStore[6], data.i6); + write_channel_intel(chaninTranStore[7], data.i7); } } - } +} - // Flush entire 3d buffer transposed through channels - for(unsigned m = 0; m < N; m++){ - - for(unsigned i = 0; i < N; i++){ - where = ((i << (LOGN + LOGN)) + ( m << LOGN)); +kernel void store(global float2 * restrict dest) { - #pragma unroll 8 - for(unsigned u = 0; u < N; u++){ - buf[(i << LOGN) + u] = buf_3d[where + u]; - } - } + const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 + bool is_bufA = false, is_bitrevA = false; - for( unsigned i = 0; i < N; i++){ - for( unsigned j = 0; j < (N / 8); j++){ - where = (j * N * 8) + i; + float2 buf[2][DEPTH][POINTS]; + float2 bitrev_in[2][N]; + //float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; - write_channel_intel(chaninfetch[0], buf[where + (0 << LOGN)]); - write_channel_intel(chaninfetch[1], buf[where + (1 << LOGN)]); - write_channel_intel(chaninfetch[2], buf[where + (2 << LOGN)]); - write_channel_intel(chaninfetch[3], buf[where + (3 << LOGN)]); - write_channel_intel(chaninfetch[4], buf[where + (4 << LOGN)]); - write_channel_intel(chaninfetch[5], buf[where + (5 << LOGN)]); - write_channel_intel(chaninfetch[6], buf[where + (6 << LOGN)]); - write_channel_intel(chaninfetch[7], buf[where + (7 << LOGN)]); - } + int initial_delay = DELAY; // for each of the bitrev buffer + // additional iterations to fill the buffers + for(int step = -initial_delay; step < ((N * DEPTH) + DEPTH); step++){ + + float2x8 data, data_out; + if (step < ((N * DEPTH) - initial_delay)) { + data.i0 = read_channel_intel(chaninTranStore[0]); + data.i1 = read_channel_intel(chaninTranStore[1]); + data.i2 = read_channel_intel(chaninTranStore[2]); + data.i3 = read_channel_intel(chaninTranStore[3]); + data.i4 = read_channel_intel(chaninTranStore[4]); + data.i5 = read_channel_intel(chaninTranStore[5]); + data.i6 = read_channel_intel(chaninTranStore[6]); + data.i7 = read_channel_intel(chaninTranStore[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + // Swap buffers every N*N/8 iterations + // starting from the additional delay of N/8 iterations + is_bufA = (( step & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; + + // Swap bitrev buffers every N/8 iterations + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + unsigned row = step & (DEPTH - 1); + data = bitreverse_in(data, + is_bitrevA ? bitrev_in[0] : bitrev_in[1], + is_bitrevA ? bitrev_in[1] : bitrev_in[0], + row); + + writeBuf(data, + is_bufA ? buf[0] : buf[1], + step, 0); + + data_out = readBuf_store( + is_bufA ? buf[1] : buf[0], + step); + + if (step >= (DEPTH)) { + unsigned start_index = (step - DEPTH); + // increment z by 1 every N/8 steps until (N*N/ 8) + unsigned zdim = (start_index >> (LOGN - LOGPOINTS)) & (N - 1); + + // increment y by 1 every N*N/8 points until N + unsigned ydim = (start_index >> (LOGN + LOGN - LOGPOINTS)) & (N - 1); + + // incremenet by 8 until N / 8 + unsigned xdim = (start_index * 8) & ( N - 1); + //unsigned index = (step - DEPTH) * 8; + + // increment by N*N*N + unsigned cube = LOGN + LOGN + LOGN - LOGPOINTS; + + // increment by 1 every N*N*N / 8 steps + unsigned batch_index = (start_index >> cube); + //unsigned batch_index = 0; + + unsigned index = (batch_index * N * N * N) + (zdim * N * N) + (ydim * N) + xdim; + + dest[index + 0] = data_out.i0; + dest[index + 1] = data_out.i1; + dest[index + 2] = data_out.i2; + dest[index + 3] = data_out.i3; + dest[index + 4] = data_out.i4; + dest[index + 5] = data_out.i5; + dest[index + 6] = data_out.i6; + dest[index + 7] = data_out.i7; } - } - } \ No newline at end of file diff --git a/kernels/fft3d/fft3d_bram_opt.cl b/kernels/fft3d/fft3d_bram_opt.cl deleted file mode 100755 index 3decfa0..0000000 --- a/kernels/fft3d/fft3d_bram_opt.cl +++ /dev/null @@ -1,495 +0,0 @@ -// Author: Arjun Ramaswami - -#include "fft_config.h" -#include "fft_8.cl" -#include "../matrixTranspose/diagonal_bitrev.cl" - -#pragma OPENCL EXTENSION cl_intel_channels : enable - -channel float2 chaninfft3da[POINTS] __attribute__((depth(POINTS))); -channel float2 chaninfft3db[POINTS] __attribute__((depth(POINTS))); -channel float2 chaninfft3dc[POINTS] __attribute__((depth(POINTS))); - -channel float2 chaninTranspose[POINTS] __attribute__((depth(POINTS))); -channel float2 chaninTranspose3D[POINTS] __attribute__((depth(POINTS))); - -channel float2 chaninTranStore[POINTS] __attribute__((depth(POINTS))); - -// Kernel that fetches data from global memory -kernel void fetch(global volatile float2 * restrict src) { - unsigned delay = (1 << (LOGN - LOGPOINTS)); // N / 8 - bool is_bitrevA = false; - - float2 __attribute__((memory, numbanks(8))) buf[2][N]; - - // additional iterations to fill the buffers - for(unsigned step = 0; step < (N * DEPTH) + delay; step++){ - - unsigned where = (step & ((N * DEPTH) - 1)) * 8; - - float2x8 data; - if (step < (N * DEPTH)) { - data.i0 = src[where + 0]; - data.i1 = src[where + 1]; - data.i2 = src[where + 2]; - data.i3 = src[where + 3]; - data.i4 = src[where + 4]; - data.i5 = src[where + 5]; - data.i6 = src[where + 6]; - data.i7 = src[where + 7]; - } else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } - - is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; - - unsigned row = step & (DEPTH - 1); - data = bitreverse_fetch(data, - is_bitrevA ? buf[0] : buf[1], - is_bitrevA ? buf[1] : buf[0], - row); - - if (step >= delay) { - write_channel_intel(chaninfft3da[0], data.i0); - write_channel_intel(chaninfft3da[1], data.i1); - write_channel_intel(chaninfft3da[2], data.i2); - write_channel_intel(chaninfft3da[3], data.i3); - write_channel_intel(chaninfft3da[4], data.i4); - write_channel_intel(chaninfft3da[5], data.i5); - write_channel_intel(chaninfft3da[6], data.i6); - write_channel_intel(chaninfft3da[7], data.i7); - } - } -} - -/* This single work-item task wraps the FFT engine - * 'inverse' toggles between the direct and the inverse transform - */ -kernel void fft3da(int inverse) { - - /* The FFT engine requires a sliding window for data reordering; data stored - * in this array is carried across loop iterations and shifted by 1 element - * every iteration; all loop dependencies derived from the uses of this - * array are simple transfers between adjacent array elements - */ - - float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; - - #pragma loop_coalesce - for(unsigned j = 0; j < N; j++){ - for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { - float2x8 data; - - if (i < N * (N / POINTS)) { - data.i0 = read_channel_intel(chaninfft3da[0]); - data.i1 = read_channel_intel(chaninfft3da[1]); - data.i2 = read_channel_intel(chaninfft3da[2]); - data.i3 = read_channel_intel(chaninfft3da[3]); - data.i4 = read_channel_intel(chaninfft3da[4]); - data.i5 = read_channel_intel(chaninfft3da[5]); - data.i6 = read_channel_intel(chaninfft3da[6]); - data.i7 = read_channel_intel(chaninfft3da[7]); - } - else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } - - data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); - - // Write result to channels - if (i >= N / POINTS - 1) { - write_channel_intel(chaninTranspose[0], data.i0); - write_channel_intel(chaninTranspose[1], data.i1); - write_channel_intel(chaninTranspose[2], data.i2); - write_channel_intel(chaninTranspose[3], data.i3); - write_channel_intel(chaninTranspose[4], data.i4); - write_channel_intel(chaninTranspose[5], data.i5); - write_channel_intel(chaninTranspose[6], data.i6); - write_channel_intel(chaninTranspose[7], data.i7); - } - } - } -} - -kernel void transpose2d() { - const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 - bool is_bufA = false, is_bitrevA = false; - - float2 buf[2][DEPTH][POINTS]; - //float2 bitrev_in[2][N], bitrev_out[2][N]; - //float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; - float2 bitrev_in[2][N]; - float2 __attribute__((memory, numbanks(8))) bitrev_out[2][N]; - - int initial_delay = DELAY + DELAY; // for each of the bitrev buffer - - // additional iterations to fill the buffers - for(int step = -initial_delay; step < ((N * DEPTH) + DEPTH); step++){ - - float2x8 data, data_out; - if (step < ((N * DEPTH) - initial_delay)) { - data.i0 = read_channel_intel(chaninTranspose[0]); - data.i1 = read_channel_intel(chaninTranspose[1]); - data.i2 = read_channel_intel(chaninTranspose[2]); - data.i3 = read_channel_intel(chaninTranspose[3]); - data.i4 = read_channel_intel(chaninTranspose[4]); - data.i5 = read_channel_intel(chaninTranspose[5]); - data.i6 = read_channel_intel(chaninTranspose[6]); - data.i7 = read_channel_intel(chaninTranspose[7]); - } else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } - - // Swap buffers every N*N/8 iterations - // starting from the additional delay of N/8 iterations - is_bufA = (( (step + DELAY) & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; - - // Swap bitrev buffers every N/8 iterations - is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; - - unsigned row = step & (DEPTH - 1); - data = bitreverse_in(data, - is_bitrevA ? bitrev_in[0] : bitrev_in[1], - is_bitrevA ? bitrev_in[1] : bitrev_in[0], - row); - - writeBuf(data, - is_bufA ? buf[0] : buf[1], - step, DELAY); - - data_out = readBuf( - is_bufA ? buf[1] : buf[0], - step); - - unsigned start_row = (step + DELAY) & (DEPTH -1); - data_out = bitreverse_out( - is_bitrevA ? bitrev_out[0] : bitrev_out[1], - is_bitrevA ? bitrev_out[1] : bitrev_out[0], - data_out, start_row); - - - if (step >= (DEPTH)) { - write_channel_intel(chaninfft3db[0], data_out.i0); - write_channel_intel(chaninfft3db[1], data_out.i1); - write_channel_intel(chaninfft3db[2], data_out.i2); - write_channel_intel(chaninfft3db[3], data_out.i3); - write_channel_intel(chaninfft3db[4], data_out.i4); - write_channel_intel(chaninfft3db[5], data_out.i5); - write_channel_intel(chaninfft3db[6], data_out.i6); - write_channel_intel(chaninfft3db[7], data_out.i7); - } - } -} - -kernel void fft3db(int inverse) { - - /* The FFT engine requires a sliding window for data reordering; data stored - * in this array is carried across loop iterations and shifted by 1 element - * every iteration; all loop dependencies derived from the uses of this - * array are simple transfers between adjacent array elements - */ - - float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; - - #pragma loop_coalesce - for(unsigned j = 0; j < N; j++){ - for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { - float2x8 data; - - if (i < N * (N / POINTS)) { - data.i0 = read_channel_intel(chaninfft3db[0]); - data.i1 = read_channel_intel(chaninfft3db[1]); - data.i2 = read_channel_intel(chaninfft3db[2]); - data.i3 = read_channel_intel(chaninfft3db[3]); - data.i4 = read_channel_intel(chaninfft3db[4]); - data.i5 = read_channel_intel(chaninfft3db[5]); - data.i6 = read_channel_intel(chaninfft3db[6]); - data.i7 = read_channel_intel(chaninfft3db[7]); - } else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } - - data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); - - if (i >= N / POINTS - 1) { - write_channel_intel(chaninTranspose3D[0], data.i0); - write_channel_intel(chaninTranspose3D[1], data.i1); - write_channel_intel(chaninTranspose3D[2], data.i2); - write_channel_intel(chaninTranspose3D[3], data.i3); - write_channel_intel(chaninTranspose3D[4], data.i4); - write_channel_intel(chaninTranspose3D[5], data.i5); - write_channel_intel(chaninTranspose3D[6], data.i6); - write_channel_intel(chaninTranspose3D[7], data.i7); - } - } - } -} - -kernel void transpose3D() { - - const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 - bool is_bufA = false, is_bitrevA = false; - - float2 buf[2][DEPTH][POINTS]; - local float2 buf3D[N * N * N]; - //float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; - float2 bitrev_in[2][N]; - - int initial_delay = DELAY; // for each of the bitrev buffer - // additional iterations to fill the buffers - for(int step = -initial_delay; step < ((N * DEPTH) + DEPTH); step++){ - - float2x8 data, data_out; - if (step < ((N * DEPTH) - initial_delay)) { - data.i0 = read_channel_intel(chaninTranspose3D[0]); - data.i1 = read_channel_intel(chaninTranspose3D[1]); - data.i2 = read_channel_intel(chaninTranspose3D[2]); - data.i3 = read_channel_intel(chaninTranspose3D[3]); - data.i4 = read_channel_intel(chaninTranspose3D[4]); - data.i5 = read_channel_intel(chaninTranspose3D[5]); - data.i6 = read_channel_intel(chaninTranspose3D[6]); - data.i7 = read_channel_intel(chaninTranspose3D[7]); - } else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } - // Swap buffers every N*N/8 iterations - // starting from the additional delay of N/8 iterations - is_bufA = (( step & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; - - // Swap bitrev buffers every N/8 iterations - is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; - - unsigned row = step & (DEPTH - 1); - data = bitreverse_in(data, - is_bitrevA ? bitrev_in[0] : bitrev_in[1], - is_bitrevA ? bitrev_in[1] : bitrev_in[0], - row); - - writeBuf(data, - is_bufA ? buf[0] : buf[1], - step, 0); - - data_out = readBuf_store( - is_bufA ? buf[1] : buf[0], - step); - - if (step >= (DEPTH)) { - unsigned index = (step - DEPTH) * 8; - - buf3D[index + 0] = data_out.i0; - buf3D[index + 1] = data_out.i1; - buf3D[index + 2] = data_out.i2; - buf3D[index + 3] = data_out.i3; - buf3D[index + 4] = data_out.i4; - buf3D[index + 5] = data_out.i5; - buf3D[index + 6] = data_out.i6; - buf3D[index + 7] = data_out.i7; - } - } - - is_bufA = false; - is_bitrevA = false; - - float2 __attribute__((memory, numbanks(8))) bitrev_out[2][N]; - - // additional iterations to fill the buffers - for(unsigned step = 0; step < (N * DEPTH) + DEPTH + DELAY; step++){ - // increment z by 1 every N/8 steps until (N*N/ 8) - unsigned start_index = step + DELAY; - unsigned zdim = (step >> (LOGN - LOGPOINTS)) & (N - 1); - - // increment y by 1 every N*N/8 points until N - unsigned ydim = (step >> (LOGN + LOGN - LOGPOINTS)) & (N - 1); - - // increment by 8 until N / 8 - unsigned xdim = (step * 8) & (N - 1); - - // increment by 1 every N*N*N / 8 steps - unsigned batch_index = (step >> (LOGN + LOGN + LOGN - LOGPOINTS)); - - unsigned index = (batch_index * N * N * N) + (zdim * N * N) + (ydim * N) + xdim; - - float2x8 data, data_out; - if (step < (N * DEPTH)) { - data.i0 = buf3D[index + 0]; - data.i1 = buf3D[index + 1]; - data.i2 = buf3D[index + 2]; - data.i3 = buf3D[index + 3]; - data.i4 = buf3D[index + 4]; - data.i5 = buf3D[index + 5]; - data.i6 = buf3D[index + 6]; - data.i7 = buf3D[index + 7]; - } else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } - - is_bufA = (( step & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; - - // Swap bitrev buffers every N/8 iterations - is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; - - writeBuf(data, - is_bufA ? buf[0] : buf[1], - step, 0); - - data_out = readBuf_fetch( - is_bufA ? buf[1] : buf[0], - step, 0); - - unsigned start_row = step & (DEPTH -1); - data_out = bitreverse_out( - is_bitrevA ? bitrev_out[0] : bitrev_out[1], - is_bitrevA ? bitrev_out[1] : bitrev_out[0], - data_out, start_row); - - if (step >= (DEPTH + DELAY)) { - - write_channel_intel(chaninfft3dc[0], data_out.i0); - write_channel_intel(chaninfft3dc[1], data_out.i1); - write_channel_intel(chaninfft3dc[2], data_out.i2); - write_channel_intel(chaninfft3dc[3], data_out.i3); - write_channel_intel(chaninfft3dc[4], data_out.i4); - write_channel_intel(chaninfft3dc[5], data_out.i5); - write_channel_intel(chaninfft3dc[6], data_out.i6); - write_channel_intel(chaninfft3dc[7], data_out.i7); - } - } -} - -/* - * Input and output data in bit-reversed format - */ -kernel void fft3dc(int inverse) { - - /* The FFT engine requires a sliding window for data reordering; data stored - * in this array is carried across loop iterations and shifted by 1 element - * every iteration; all loop dependencies derived from the uses of this - * array are simple transfers between adjacent array elements - */ - - float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; - - #pragma loop_coalesce - for(unsigned j = 0; j < N; j++){ - - for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { - float2x8 data; - - if (i < N * (N / POINTS)) { - data.i0 = read_channel_intel(chaninfft3dc[0]); - data.i1 = read_channel_intel(chaninfft3dc[1]); - data.i2 = read_channel_intel(chaninfft3dc[2]); - data.i3 = read_channel_intel(chaninfft3dc[3]); - data.i4 = read_channel_intel(chaninfft3dc[4]); - data.i5 = read_channel_intel(chaninfft3dc[5]); - data.i6 = read_channel_intel(chaninfft3dc[6]); - data.i7 = read_channel_intel(chaninfft3dc[7]); - } else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } - - // Perform one FFT step - data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); - - // Write result to channels - if (i >= N / POINTS - 1) { - write_channel_intel(chaninTranStore[0], data.i0); - write_channel_intel(chaninTranStore[1], data.i1); - write_channel_intel(chaninTranStore[2], data.i2); - write_channel_intel(chaninTranStore[3], data.i3); - write_channel_intel(chaninTranStore[4], data.i4); - write_channel_intel(chaninTranStore[5], data.i5); - write_channel_intel(chaninTranStore[6], data.i6); - write_channel_intel(chaninTranStore[7], data.i7); - } - } - } -} - -kernel void store(global float2 * restrict dest) { - - const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 - bool is_bufA = false, is_bitrevA = false; - - float2 buf[2][DEPTH][POINTS]; - float2 bitrev_in[2][N]; - //float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; - - int initial_delay = DELAY; // for each of the bitrev buffer - // additional iterations to fill the buffers - for(int step = -initial_delay; step < ((N * DEPTH) + DEPTH); step++){ - - float2x8 data, data_out; - if (step < ((N * DEPTH) - initial_delay)) { - data.i0 = read_channel_intel(chaninTranStore[0]); - data.i1 = read_channel_intel(chaninTranStore[1]); - data.i2 = read_channel_intel(chaninTranStore[2]); - data.i3 = read_channel_intel(chaninTranStore[3]); - data.i4 = read_channel_intel(chaninTranStore[4]); - data.i5 = read_channel_intel(chaninTranStore[5]); - data.i6 = read_channel_intel(chaninTranStore[6]); - data.i7 = read_channel_intel(chaninTranStore[7]); - } else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } - // Swap buffers every N*N/8 iterations - // starting from the additional delay of N/8 iterations - is_bufA = (( step & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; - - // Swap bitrev buffers every N/8 iterations - is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; - - unsigned row = step & (DEPTH - 1); - data = bitreverse_in(data, - is_bitrevA ? bitrev_in[0] : bitrev_in[1], - is_bitrevA ? bitrev_in[1] : bitrev_in[0], - row); - - writeBuf(data, - is_bufA ? buf[0] : buf[1], - step, 0); - - data_out = readBuf_store( - is_bufA ? buf[1] : buf[0], - step); - - if (step >= (DEPTH)) { - unsigned start_index = (step - DEPTH); - // increment z by 1 every N/8 steps until (N*N/ 8) - unsigned zdim = (start_index >> (LOGN - LOGPOINTS)) & (N - 1); - - // increment y by 1 every N*N/8 points until N - unsigned ydim = (start_index >> (LOGN + LOGN - LOGPOINTS)) & (N - 1); - - // incremenet by 8 until N / 8 - unsigned xdim = (start_index * 8) & ( N - 1); - //unsigned index = (step - DEPTH) * 8; - - // increment by N*N*N - unsigned cube = LOGN + LOGN + LOGN - LOGPOINTS; - - // increment by 1 every N*N*N / 8 steps - unsigned batch_index = (start_index >> cube); - //unsigned batch_index = 0; - - unsigned index = (batch_index * N * N * N) + (zdim * N * N) + (ydim * N) + xdim; - - dest[index + 0] = data_out.i0; - dest[index + 1] = data_out.i1; - dest[index + 2] = data_out.i2; - dest[index + 3] = data_out.i3; - dest[index + 4] = data_out.i4; - dest[index + 5] = data_out.i5; - dest[index + 6] = data_out.i6; - dest[index + 7] = data_out.i7; - } - } -} \ No newline at end of file diff --git a/kernels/fft3d/fft3d_bram_triv.cl b/kernels/fft3d/fft3d_bram_triv.cl new file mode 100755 index 0000000..e23ee64 --- /dev/null +++ b/kernels/fft3d/fft3d_bram_triv.cl @@ -0,0 +1,317 @@ +// Author: Arjun Ramaswami + +#include "fft_8.cl" + +// Source the log(size) (log(1k) = 10) from a header shared with the host code +#include "fft_config.h" + +#pragma OPENCL EXTENSION cl_intel_channels : enable +channel float2 chaninfft[8] __attribute__((depth(8))); +channel float2 chanoutfft[8] __attribute__((depth(8))); +channel float2 chaninfft2[8] __attribute__((depth(8))); +channel float2 chanoutfft2[8] __attribute__((depth(8))); +channel float2 chaninfetch[8] __attribute__((depth(8))); + + +int bit_reversed(int x, int bits) { + int y = 0; + #pragma unroll + for (int i = 0; i < bits; i++) { + y <<= 1; + y |= x & 1; + x >>= 1; + } + return y; +} + +void sendTofft(float2 *buffer, unsigned j){ + write_channel_intel(chaninfft[0], buffer[j]); // 0 + write_channel_intel(chaninfft[1], buffer[4 * N / 8 + j]); // 32 + write_channel_intel(chaninfft[2], buffer[2 * N / 8 + j]); // 16 + write_channel_intel(chaninfft[3], buffer[6 * N / 8 + j]); // 48 + write_channel_intel(chaninfft[4], buffer[N / 8 + j]); // 8 + write_channel_intel(chaninfft[5], buffer[5 * N / 8 + j]); // 40 + write_channel_intel(chaninfft[6], buffer[3 * N / 8 + j]); // 24 + write_channel_intel(chaninfft[7], buffer[7 * N / 8 + j]); // 54 +} + +// Kernel that fetches data from global memory +kernel void fetch(global volatile float2 * restrict src) { + + for(unsigned k = 0; k < (1 << (LOGN + LOGN)); k++){ + + float2 buf[N]; + #pragma unroll 8 + for(unsigned i = 0; i < N; i++){ + buf[i & ((1<= N / POINTS - 1) { + write_channel_intel(chanoutfft[0], data.i0); + write_channel_intel(chanoutfft[1], data.i1); + write_channel_intel(chanoutfft[2], data.i2); + write_channel_intel(chanoutfft[3], data.i3); + write_channel_intel(chanoutfft[4], data.i4); + write_channel_intel(chanoutfft[5], data.i5); + write_channel_intel(chanoutfft[6], data.i6); + write_channel_intel(chanoutfft[7], data.i7); + } + } + } +} + +// Transposes fetched data; stores them to global memory +kernel void transpose(global float2 * restrict dest) { + + unsigned revcolt, where_read, where_write, where; + + local float2 buf[N * N]; + + // Perform N times N*N transpositions and transfers + for(unsigned p = 0; p < N; p++){ + + for(unsigned i = 0; i < N; i++){ + for(unsigned k = 0; k < (N / 8); k++){ + where_read = ((i << LOGN) + (k << LOGPOINTS)); + + buf[where_read + 0] = read_channel_intel(chanoutfft[0]); + buf[where_read + 1] = read_channel_intel(chanoutfft[1]); + buf[where_read + 2] = read_channel_intel(chanoutfft[2]); + buf[where_read + 3] = read_channel_intel(chanoutfft[3]); + buf[where_read + 4] = read_channel_intel(chanoutfft[4]); + buf[where_read + 5] = read_channel_intel(chanoutfft[5]); + buf[where_read + 6] = read_channel_intel(chanoutfft[6]); + buf[where_read + 7] = read_channel_intel(chanoutfft[7]); + } + } + + for(unsigned i = 0; i < N; i++){ + revcolt = bit_reversed(i, LOGN); + + for(unsigned k = 0; k < (N / 8); k++){ + where_write = ((k * N) + revcolt); + + write_channel_intel(chaninfft2[0], buf[where_write]); // 0 + write_channel_intel(chaninfft2[1], buf[where_write + 4 * (N / 8) * N]); // 32 + write_channel_intel(chaninfft2[2], buf[where_write + 2 * (N / 8) * N]); // 16 + write_channel_intel(chaninfft2[3], buf[where_write + 6 * (N / 8) * N]); // 48 + write_channel_intel(chaninfft2[4], buf[where_write + (N / 8) * N]); // 8 + write_channel_intel(chaninfft2[5], buf[where_write + 5 * (N / 8) * N]); // 40 + write_channel_intel(chaninfft2[6], buf[where_write + 3 * (N / 8) * N]); // 24 + write_channel_intel(chaninfft2[7], buf[where_write + 7 * (N / 8) * N]); // 54 + } + } + } + + for(unsigned p = 0; p < N; p++){ + + for(unsigned i = 0; i < N; i++){ + for(unsigned j = 0; j < (N / 8); j++){ + where = ((i << LOGN) + (j << LOGPOINTS)); + + buf[where + 0] = read_channel_intel(chanoutfft[0]); + buf[where + 1] = read_channel_intel(chanoutfft[1]); + buf[where + 2] = read_channel_intel(chanoutfft[2]); + buf[where + 3] = read_channel_intel(chanoutfft[3]); + buf[where + 4] = read_channel_intel(chanoutfft[4]); + buf[where + 5] = read_channel_intel(chanoutfft[5]); + buf[where + 6] = read_channel_intel(chanoutfft[6]); + buf[where + 7] = read_channel_intel(chanoutfft[7]); + } + } + + for(unsigned i = 0; i < N; i++){ + revcolt = bit_reversed(i, LOGN); + where = ( (i << (LOGN + LOGN)) + (p << LOGN)); + + #pragma unroll 8 + for( unsigned q = 0; q < N; q++){ + dest[where + q] = buf[(q << LOGN) + revcolt]; + } + } + + } + +} + +kernel void fft3db(int inverse) { + + /* The FFT engine requires a sliding window for data reordering; data stored + * in this array is carried across loop iterations and shifted by 1 element + * every iteration; all loop dependencies derived from the uses of this + * array are simple transfers between adjacent array elements + */ + + float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; + for( int j = 0; j < N; j++){ + + for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { + float2x8 data; + + // Read data from channels + if (i < N * (N / POINTS)) { + data.i0 = read_channel_intel(chaninfft2[0]); + data.i1 = read_channel_intel(chaninfft2[1]); + data.i2 = read_channel_intel(chaninfft2[2]); + data.i3 = read_channel_intel(chaninfft2[3]); + data.i4 = read_channel_intel(chaninfft2[4]); + data.i5 = read_channel_intel(chaninfft2[5]); + data.i6 = read_channel_intel(chaninfft2[6]); + data.i7 = read_channel_intel(chaninfft2[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + + // Perform one FFT step + data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); + + // Write result to channels + if (i >= N / POINTS - 1) { + write_channel_intel(chanoutfft2[0], data.i0); + write_channel_intel(chanoutfft2[1], data.i1); + write_channel_intel(chanoutfft2[2], data.i2); + write_channel_intel(chanoutfft2[3], data.i3); + write_channel_intel(chanoutfft2[4], data.i4); + write_channel_intel(chanoutfft2[5], data.i5); + write_channel_intel(chanoutfft2[6], data.i6); + write_channel_intel(chanoutfft2[7], data.i7); + } + } + + } +} + +// Stores data for 3rd dim FFT +kernel void transpose3D(){ + unsigned revcolt, where; + unsigned where_test; + + local float2 buf_3d[N * N * N]; + local float2 buf[N * N]; + + // perform N*N*N writes to buffer + for(unsigned m = 0; m < N; m++){ + + for(unsigned i = 0; i < N; i++){ + for(unsigned j = 0; j < (N / 8); j++){ + where = ((i << LOGN) + (j << LOGPOINTS)); + + buf[where + 0] = read_channel_intel(chanoutfft2[0]); + buf[where + 1] = read_channel_intel(chanoutfft2[1]); + buf[where + 2] = read_channel_intel(chanoutfft2[2]); + buf[where + 3] = read_channel_intel(chanoutfft2[3]); + buf[where + 4] = read_channel_intel(chanoutfft2[4]); + buf[where + 5] = read_channel_intel(chanoutfft2[5]); + buf[where + 6] = read_channel_intel(chanoutfft2[6]); + buf[where + 7] = read_channel_intel(chanoutfft2[7]); + } + } + + for(unsigned i = 0; i < N; i++){ + revcolt = bit_reversed(i, LOGN); + where = (i << LOGN) + (m << (LOGN + LOGN)); + + #pragma unroll 8 + for( unsigned u = 0; u < N; u++){ + buf_3d[where + u] = buf[(u << LOGN) + revcolt]; + } + } + + } + + // Flush entire 3d buffer transposed through channels + for(unsigned m = 0; m < N; m++){ + + for(unsigned i = 0; i < N; i++){ + where = ((i << (LOGN + LOGN)) + ( m << LOGN)); + + #pragma unroll 8 + for(unsigned u = 0; u < N; u++){ + buf[(i << LOGN) + u] = buf_3d[where + u]; + } + } + + for( unsigned i = 0; i < N; i++){ + for( unsigned j = 0; j < (N / 8); j++){ + where = (j * N * 8) + i; + + write_channel_intel(chaninfetch[0], buf[where + (0 << LOGN)]); + write_channel_intel(chaninfetch[1], buf[where + (1 << LOGN)]); + write_channel_intel(chaninfetch[2], buf[where + (2 << LOGN)]); + write_channel_intel(chaninfetch[3], buf[where + (3 << LOGN)]); + write_channel_intel(chaninfetch[4], buf[where + (4 << LOGN)]); + write_channel_intel(chaninfetch[5], buf[where + (5 << LOGN)]); + write_channel_intel(chaninfetch[6], buf[where + (6 << LOGN)]); + write_channel_intel(chaninfetch[7], buf[where + (7 << LOGN)]); + } + } + + } + +} \ No newline at end of file diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index f45ca32..816623d 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -36,7 +36,7 @@ else() endif() add_dependencies(test_fftfpga fft3d_bram_emu) -add_dependencies(test_fftfpga fft3d_ddr_triv_emu) +add_dependencies(test_fftfpga fft3d_ddr_emu) add_dependencies(test_fftfpga fft2d_bram_emu) add_dependencies(test_fftfpga fft2d_ddr_emu) add_dependencies(test_fftfpga fft1d_emu) From 82880686c305f061546cddc2370c412d96cf0bb4 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Wed, 9 Sep 2020 16:42:18 +0200 Subject: [PATCH 26/76] CI: fixed host executable names --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 42ef972..76ec8a0 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -17,7 +17,7 @@ build-all: - make fft2d_bram_emu - make fft3d_ddr_emu - make fft3d_bram_emu - - chmod +x bin/fft3d bin/fft2d bin/fft1d bin/test_fftfpga + - chmod +x bin/fft3d_bram bin/fft3d_ddr bin/fft2d bin/fft1d bin/test_fftfpga artifacts: paths: - build/bin/emu_64_fft1d/fft1d.aocx From 19c28fe5732c9df6d81cb6764fba0b1885271c01 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Thu, 10 Sep 2020 17:15:49 +0200 Subject: [PATCH 27/76] FFT1d: working svm --- api/src/fftfpga.c | 10 +++++----- examples/fft3d_ddr.c | 4 ++++ 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/api/src/fftfpga.c b/api/src/fftfpga.c index f763a23..3979325 100755 --- a/api/src/fftfpga.c +++ b/api/src/fftfpga.c @@ -395,7 +395,7 @@ fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, bool inv, int batch){ fpga_t fftfpgaf_c2c_1d_svm(int N, float2 *inp, float2 *out, bool inv, int batch){ fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_int status = 0; - int num_pts = N * N * N; + int num_pts = N * batch; // if N is not a power of 2 if(inp == NULL || out == NULL || ( (N & (N-1)) !=0)){ @@ -403,7 +403,7 @@ fpga_t fftfpgaf_c2c_1d_svm(int N, float2 *inp, float2 *out, bool inv, int batch) } #ifdef VERBOSE - printf("Launching%s 3d FFT transform in DDR \n", inv ? " inverse":""); + printf("Launching%s 1D FFT transform in DDR \n", inv ? " inverse":""); #endif // Can't pass bool to device, so convert it to int @@ -1080,17 +1080,17 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, float2 *inp, float2 *out, bool inv) { checkError(status, "Failed to set fftb kernel arg"); // kernel stores to DDR memory - status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_inOutData); + status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void*)&d_inOutData); checkError(status, "Failed to set store1 kernel arg"); // kernel fetches from DDR memory - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_inOutData); + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void*)&d_inOutData); checkError(status, "Failed to set fetch2 kernel arg"); status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); checkError(status, "Failed to set fftc kernel arg"); // kernel stores using SVM based PCIe to host - status = clSetKernelArgSVMPointer(store2_kernel, 0, (void *)h_outData); + status = clSetKernelArgSVMPointer(store2_kernel, 0, (void*)h_outData); checkError(status, "Failed to set store2 kernel arg"); fft_time.exec_t = getTimeinMilliSec(); diff --git a/examples/fft3d_ddr.c b/examples/fft3d_ddr.c index a69fa8e..59d8ae1 100755 --- a/examples/fft3d_ddr.c +++ b/examples/fft3d_ddr.c @@ -30,6 +30,7 @@ int main(int argc, const char **argv) { fpga_t timing = {0.0, 0.0, 0.0, 0}; double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; double temp_timer = 0.0, total_api_time = 0.0; + double data_timer = 0.0; struct argparse_option options[] = { OPT_HELP(), @@ -74,13 +75,16 @@ int main(int argc, const char **argv) { for(size_t i = 0; i < iter; i++){ // create and destroy data every iteration + data_timer = getTimeinMilliseconds(); status = fftf_create_data(inp, N * N * N); + data_timer = getTimeinMilliseconds() - data_timer; if(!status){ fprintf(stderr, "Error in Data Creation \n"); free(inp); free(out); return EXIT_FAILURE; } + printf("Time to Create Data: %lfsec for %uMB\n", data_timer * 1e-3, (N*N*N* 8 / (1024 * 1024))); // use ddr for 3d Transpose temp_timer = getTimeinMilliseconds(); From ce228020834efe3eb3ea1f1737fd62d2204334de Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Thu, 1 Oct 2020 13:16:33 +0200 Subject: [PATCH 28/76] CI: corrected build, artifact paths --- .gitlab-ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 76ec8a0..52d1ec3 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -14,15 +14,15 @@ build-all: - make - make fft1d_emu - make fft2d_ddr_emu - - make fft2d_bram_emu + - make fft2d_bram_opt_emu - make fft3d_ddr_emu - make fft3d_bram_emu - chmod +x bin/fft3d_bram bin/fft3d_ddr bin/fft2d bin/fft1d bin/test_fftfpga artifacts: paths: - build/bin/emu_64_fft1d/fft1d.aocx - - build/bin/emu_64_fft2d_ddr/fft2d_bram.aocx - - build/bin/emu_64_fft2d_bram/fft2d_ddr.aocx + - build/bin/emu_64_fft2d_ddr/fft2d_ddr.aocx + - build/bin/emu_64_fft2d_bram_opt/fft2d_bram_opt.aocx - build/bin/emu_64_fft3d_ddr/fft3d_ddr.aocx - build/bin/emu_64_fft3d_bram/fft3d_bram.aocx - build/bin/test_fftfpga From 047d23bd86bd57ec086feb151f382203e108785b Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Thu, 1 Oct 2020 13:41:04 +0200 Subject: [PATCH 29/76] Test: fix path to 2d bram --- tests/test_fft2d_fpga.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_fft2d_fpga.cpp b/tests/test_fft2d_fpga.cpp index ae381a1..9037d3f 100644 --- a/tests/test_fft2d_fpga.cpp +++ b/tests/test_fft2d_fpga.cpp @@ -47,7 +47,7 @@ TEST(fft2dFPGATest, CorrectnessBRAM){ const int N = (1 << 6); fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft2d_bram/fft2d_bram.aocx", false); + int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft2d_bram_opt/fft2d_bram_opt.aocx", false); EXPECT_EQ(isInit, 0); size_t sz = sizeof(float2) * N * N; From e96acbf1de5be2f87063834f8498d1c735367665 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Thu, 1 Oct 2020 13:50:24 +0200 Subject: [PATCH 30/76] CI: Adding release debug to restrict prints --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 52d1ec3..a2e326e 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -10,7 +10,7 @@ build-all: script: - rm -rf build - mkdir -p build && cd build - - cmake -DLOG_FFT_SIZE=6 .. + - cmake -DLOG_FFT_SIZE=6 -DCMAKE_BUILD_TYPE=Release .. - make - make fft1d_emu - make fft2d_ddr_emu From 55175812111c18832dedd80485686cb88a19605c Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Thu, 1 Oct 2020 13:56:54 +0200 Subject: [PATCH 31/76] Test: fixed path for 3d FFT DDR --- tests/test_fft3d_fpga.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_fft3d_fpga.cpp b/tests/test_fft3d_fpga.cpp index 4cfedb4..a513531 100644 --- a/tests/test_fft3d_fpga.cpp +++ b/tests/test_fft3d_fpga.cpp @@ -104,7 +104,7 @@ TEST(fftFPGATest, ValidSp3dFFTDDR){ const int N = (1 << 6); fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft3d_ddr_triv/fft3d_ddr_triv.aocx", 0); + int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft3d_ddr/fft3d_ddr.aocx", 0); ASSERT_EQ(isInit, 0); size_t sz = sizeof(float2) * N * N * N; @@ -170,7 +170,7 @@ TEST(fftFPGATest, ValidSp3dFFTDDRSVMBatch){ const int N = (1 << 6); fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft3d_ddr_triv/fft3d_ddr_triv.aocx", 0); + int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft3d_ddr/fft3d_ddr.aocx", 0); ASSERT_EQ(isInit, 0); // Random number of batches between 1 and 10 From eb0961d244f88c274840a26669e6c4607954c6a9 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Thu, 1 Oct 2020 16:36:35 +0200 Subject: [PATCH 32/76] modified verification function --- api/include/fftfpga/fftfpga.h | 19 ++++++ examples/common/helper.c | 4 +- examples/common/verify_fftw.c | 105 +++++++-------------------------- examples/common/verify_fftw.h | 4 +- examples/fft2d.c | 2 +- examples/fft3d_bram.c | 2 +- examples/fft3d_ddr.c | 2 +- examples/fft3d_ddr_svm.c | 2 +- examples/fft3d_ddr_svm_batch.c | 2 +- tests/test_fft2d_fpga.cpp | 4 +- tests/test_fft3d_fpga.cpp | 18 +++--- 11 files changed, 60 insertions(+), 104 deletions(-) diff --git a/api/include/fftfpga/fftfpga.h b/api/include/fftfpga/fftfpga.h index 4c46e8b..1ed7498 100755 --- a/api/include/fftfpga/fftfpga.h +++ b/api/include/fftfpga/fftfpga.h @@ -105,6 +105,17 @@ extern fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, bool inv, int ite */ extern fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, bool inv, int interleaving, int how_many); +/** + * @brief compute an out-of-place single precision complex 2DFFT using the BRAM of the FPGA and Shared Virtual Memory for Host to Device Communication + * @param N : integer pointer to size of FFT2d + * @param inp : float2 pointer to input data of size [N * N] + * @param out : float2 pointer to output data of size [N * N] + * @param inv : int toggle to activate backward FFT + * @param how_many : number of 2D FFTs to computer, default 1 + * @return fpga_t : time taken in milliseconds for data transfers and execution + */ +extern fpga_t fftfpgaf_c2c_2d_bram_svm(int N, float2 *inp, float2 *out, bool inv, int how_many); + /** * @brief compute an out-of-place single precision complex 2D-FFT using the DDR of the FPGA * @param N : integer pointer to size of FFT2d @@ -136,6 +147,14 @@ extern fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, bool inv, bo */ extern fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, bool inv); +/** + * @brief compute an out-of-place single precision complex 3D-FFT using the DDR of the FPGA and Shared Virtual Memory for Host to Device Communication + * @param N : integer pointer addressing the size of FFT3d + * @param inp : float2 pointer to input data of size [N * N * N] + * @param out : float2 pointer to output data of size [N * N * N] + * @param inv : int toggle to activate backward FFT + * @return fpga_t : time taken in milliseconds for data transfers and execution + */ extern fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, float2 *inp, float2 *out, bool inv); extern fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, float2 *inp, float2 *out, bool inv, int how_many); diff --git a/examples/common/helper.c b/examples/common/helper.c index 6891648..2769648 100755 --- a/examples/common/helper.c +++ b/examples/common/helper.c @@ -99,7 +99,7 @@ void display_measures(double total_api_time, double pcie_rd, double pcie_wr, dou double pcie_write = pcie_wr / iter; double exec = exec_t / iter; - double gpoints_per_sec = (pow(N, dim) / (exec * 1e-3)) * 1e-9; + double gpoints_per_sec = (batch * pow(N, dim)) / (exec * 1e-3 * 1024 * 1024 * 1024); double gBytes_per_sec = 0.0; if(sp){ @@ -109,7 +109,7 @@ void display_measures(double total_api_time, double pcie_rd, double pcie_wr, dou gBytes_per_sec *= gpoints_per_sec * 16; } - double gflops = dim * 5 * pow(N, dim) * (log((double)N)/log((double)2))/(exec * 1e-3 * 1E9); + double gflops = batch * dim * 5 * pow(N, dim) * (log((double)N)/log((double)2))/(exec * 1e-3 * 1024*1024*1024); printf("\n\n------------------------------------------\n"); printf("Measurements \n"); diff --git a/examples/common/verify_fftw.c b/examples/common/verify_fftw.c index bee2de4..286962d 100644 --- a/examples/common/verify_fftw.c +++ b/examples/common/verify_fftw.c @@ -2,24 +2,27 @@ #include #include #include +#include #include "fftfpga/fftfpga.h" #ifdef USE_FFTW #include + /** - * \brief verify FPGA fft3d with FFTW fft3d - * \param fpga_out: pointer to fpga computation of fft3d for sp complex data - * \param fftw_data: pointer to fft3d sized allocation of sp complex data for fftw cpu computation + * \brief Verify FFT computed in FPGA with FFTW + * \param fpga_out: pointer to FPGA computation for sp complex data + * \param fftw_data: pointer to FFT sized allocation of sp complex data for fftw cpu computation * \param N: number of points per dimension of FFT3d + * \param dim: number of dimensions of points * \param inverse: true if backward FFT - * \param how_many: batch, default is 1 + * \param how_many: default is 1 * \return true if verification passed */ -bool verify_sp_fft3d_fftw(float2 *fpgaout, float2 *verify, int N, bool inverse, int how_many){ +bool verify_fftwf(float2 *fpgaout, float2 *verify, int N, int dim, bool inverse, int how_many){ // Copy inp data to verify using FFTW // requires allocating data specifically for FFTW computation - size_t num_pts = how_many * N * N * N; + size_t num_pts = how_many * pow(N, dim); fftwf_complex *fftw_data = fftwf_alloc_complex(num_pts); for(size_t i = 0; i < num_pts; i++){ @@ -27,21 +30,26 @@ bool verify_sp_fft3d_fftw(float2 *fpgaout, float2 *verify, int N, bool inverse, fftw_data[i][1] = verify[i].y; } + int *n = (int*)calloc(N * dim , sizeof(int)); + for(size_t i = 0; i < dim; i++){ + n[i] = N; + } + // Compute 3d FFT using FFTW // Create Plan using simple heuristic and in place FFT fftwf_plan plan; - int rank = 3; - const int n[] = {N, N, N}; - int howmany = how_many; - int idist = N*N*N, odist = N*N*N; + //const int n[] = {N, N, N}; + //int idist = N*N*N, odist = N*N*N; + int idist = pow(N, dim); + int odist = pow(N, dim); int istride = 1, ostride = 1; // contiguous in memory if(inverse){ - plan = fftwf_plan_many_dft(rank, n, howmany, &fftw_data[0], NULL, istride, idist, fftw_data, NULL, ostride, odist, FFTW_BACKWARD, FFTW_ESTIMATE); + plan = fftwf_plan_many_dft(dim, n, how_many, &fftw_data[0], NULL, istride, idist, fftw_data, NULL, ostride, odist, FFTW_BACKWARD, FFTW_ESTIMATE); //plan = fftwf_plan_dft_3d( N, N, N, &fftw_data[0], &fftw_data[0], FFTW_BACKWARD, FFTW_ESTIMATE); } else{ - plan = fftwf_plan_many_dft(rank, n, howmany, &fftw_data[0], NULL, istride, idist, fftw_data, NULL, ostride, odist, FFTW_FORWARD, FFTW_ESTIMATE); + plan = fftwf_plan_many_dft(dim, n, how_many, &fftw_data[0], NULL, istride, idist, fftw_data, NULL, ostride, odist, FFTW_FORWARD, FFTW_ESTIMATE); //plan = fftwf_plan_dft_3d( N, N, N, &fftw_data[0], &fftw_data[0], FFTW_FORWARD, FFTW_ESTIMATE); } @@ -83,79 +91,6 @@ bool verify_sp_fft3d_fftw(float2 *fpgaout, float2 *verify, int N, bool inverse, printf("\tSignal to noise ratio on output sample: %f --> %s\n\n", db, "FAILED"); return false; } - -} - -/** - * \brief verify FPGA fft2d with FFTW fft2d - * \param fpga_out: pointer to fpga computation of fft2d for sp complex data - * \param fftw_data: pointer to fft2d sized allocation of sp complex data for fftw cpu computation - * \param N: number of points per dimension of FFT2d - * \param inverse: true if backward FFT - * \return true if verification passed - */ -bool verify_sp_fft2d_fftw(float2 *fpgaout, float2 *verify, int N, bool inverse){ - - // Copy inp data to verify using FFTW - // requires allocating data specifically for FFTW computation - size_t num_pts = N * N; - fftwf_complex *fftw_data = fftwf_alloc_complex(num_pts); - - for(size_t i = 0; i < num_pts; i++){ - fftw_data[i][0] = verify[i].x; - fftw_data[i][1] = verify[i].y; - } - - // Compute 3d FFT using FFTW - // Create Plan using simple heuristic and in place FFT - fftwf_plan plan; - - if(inverse){ - plan = fftwf_plan_dft_2d( N, N, &fftw_data[0], &fftw_data[0], FFTW_BACKWARD, FFTW_ESTIMATE); - } - else{ - plan = fftwf_plan_dft_2d( N, N, &fftw_data[0], &fftw_data[0], FFTW_FORWARD, FFTW_ESTIMATE); - } - - // Execute in place FFTW based on plan created - fftwf_execute(plan); - - // verify by calculating signal-to-noise ratio (SNR) - float mag_sum = 0, noise_sum = 0, magnitude, noise; - - for (size_t i = 0; i < num_pts; i++) { - - magnitude = fftw_data[i][0] * fftw_data[i][0] + \ - fftw_data[i][1] * fftw_data[i][1]; - noise = (fftw_data[i][0] - fpgaout[i].x) \ - * (fftw_data[i][0] - fpgaout[i].x) + - (fftw_data[i][1] - fpgaout[i].y) * (fftw_data[i][1] - fpgaout[i].y); - - mag_sum += magnitude; - noise_sum += noise; -#ifndef NDEBUG - printf("%zu : fpga - (%e %e) cpu - (%e %e)\n", i, fpgaout[i].x, fpgaout[i].y, fftw_data[i][0], fftw_data[i][1]); -#endif - } - - // Calculate SNR - float db = 10 * log(mag_sum / noise_sum) / log(10.0); - - // Free FFTW data - fftwf_free(fftw_data); - - // destroy plan - fftwf_destroy_plan(plan); - - // if SNR greater than 120, verification passes - if(db > 120){ - return true; - } - else{ - printf("\tSignal to noise ratio on output sample: %f --> %s\n\n", db, "FAILED"); - return false; - } - } #endif // USE_FFTW \ No newline at end of file diff --git a/examples/common/verify_fftw.h b/examples/common/verify_fftw.h index 098f1f1..4a7c759 100644 --- a/examples/common/verify_fftw.h +++ b/examples/common/verify_fftw.h @@ -3,8 +3,8 @@ #ifndef FFT3D_FFTW_H #define FFT3D_FFTW_H -int verify_sp_fft2d_fftw(float2 *fpgaout, float2 *verify, int N, bool inverse); +#include -int verify_sp_fft3d_fftw(float2 *fpgaout, float2 *verify, int N, bool inverse, int how_many); +bool verify_fftwf(float2 *fpgaout, float2 *verify, int N, int dim, bool inverse, int how_many); #endif // FFT3D_FFTW_H \ No newline at end of file diff --git a/examples/fft2d.c b/examples/fft2d.c index cf4b0f4..6483b7e 100644 --- a/examples/fft2d.c +++ b/examples/fft2d.c @@ -94,7 +94,7 @@ int main(int argc, const char **argv) { } #ifdef USE_FFTW - if(!verify_sp_fft2d_fftw(out, inp, N, inv)){ + if(!verify_fftwf(out, inp, N, 2, inv, 1)){ fprintf(stderr, "2d FFT Verification Failed \n"); free(inp); free(out); diff --git a/examples/fft3d_bram.c b/examples/fft3d_bram.c index 378d29f..879b651 100755 --- a/examples/fft3d_bram.c +++ b/examples/fft3d_bram.c @@ -88,7 +88,7 @@ int main(int argc, const char **argv) { total_api_time += getTimeinMilliseconds() - temp_timer; #ifdef USE_FFTW - if(!verify_sp_fft3d_fftw(out, inp, N, inv, 1)){ + if(!verify_fftwf(out, inp, N, 3, inv, 1)){ fprintf(stderr, "3d FFT Verification Failed \n"); free(inp); free(out); diff --git a/examples/fft3d_ddr.c b/examples/fft3d_ddr.c index 59d8ae1..dfae683 100755 --- a/examples/fft3d_ddr.c +++ b/examples/fft3d_ddr.c @@ -92,7 +92,7 @@ int main(int argc, const char **argv) { total_api_time += getTimeinMilliseconds() - temp_timer; #ifdef USE_FFTW - if(!verify_sp_fft3d_fftw(out, inp, N, inv, 1)){ + if(!verify_fftwf(out, inp, N, 3, inv, 1)){ fprintf(stderr, "3d FFT Verification Failed \n"); free(inp); free(out); diff --git a/examples/fft3d_ddr_svm.c b/examples/fft3d_ddr_svm.c index 59cc32d..3310682 100755 --- a/examples/fft3d_ddr_svm.c +++ b/examples/fft3d_ddr_svm.c @@ -87,7 +87,7 @@ int main(int argc, const char **argv) { total_api_time += getTimeinMilliseconds() - temp_timer; #ifdef USE_FFTW - if(!verify_sp_fft3d_fftw(out, inp, N, inv, 1)){ + if(!verify_fftwf(out, inp, N, 3, inv, 1)){ fprintf(stderr, "3d FFT Verification Failed \n"); free(inp); free(out); diff --git a/examples/fft3d_ddr_svm_batch.c b/examples/fft3d_ddr_svm_batch.c index b7d43cf..e93ef1c 100755 --- a/examples/fft3d_ddr_svm_batch.c +++ b/examples/fft3d_ddr_svm_batch.c @@ -89,7 +89,7 @@ int main(int argc, const char **argv) { total_api_time += getTimeinMilliseconds() - temp_timer; #ifdef USE_FFTW - if(!verify_sp_fft3d_fftw(out, inp, N, inv, batch)){ + if(!verify_fftwf(out, inp, N, 3, inv, batch)){ fprintf(stderr, "3d FFT Verification Failed \n"); free(inp); free(out); diff --git a/tests/test_fft2d_fpga.cpp b/tests/test_fft2d_fpga.cpp index 9037d3f..721d0be 100644 --- a/tests/test_fft2d_fpga.cpp +++ b/tests/test_fft2d_fpga.cpp @@ -58,7 +58,7 @@ TEST(fft2dFPGATest, CorrectnessBRAM){ fft_time = fftfpgaf_c2c_2d_bram(N, inp, out, 0, 0, 1); - int result = verify_sp_fft2d_fftw(out, inp, N, 0); + int result = verify_fftwf(out, inp, N, 2, 0, 1); EXPECT_EQ(result, 1); @@ -115,7 +115,7 @@ TEST(fftFPGATest, ValidSp2dFFTDDR){ fft_time = fftfpgaf_c2c_2d_ddr(N, inp, out, 0); - int result = verify_sp_fft2d_fftw(out, inp, N, 0); + int result = verify_fftwf(out, inp, N, 2, 0, 1); EXPECT_EQ(result, 1); diff --git a/tests/test_fft3d_fpga.cpp b/tests/test_fft3d_fpga.cpp index a513531..2970b6b 100644 --- a/tests/test_fft3d_fpga.cpp +++ b/tests/test_fft3d_fpga.cpp @@ -3,6 +3,7 @@ #include "gtest/gtest.h" // finds this because gtest is linked #include // malloc, free #include +#include #ifdef USE_FFTW #include #endif @@ -58,7 +59,7 @@ TEST(fft3dFPGATest, CorrectnessBRAM){ fft_time = fftfpgaf_c2c_3d_bram(N, inp, out, 0, 0); - int result = verify_sp_fft3d_fftw(out, inp, N, 0, 1); + int result = verify_fftwf(out, inp, N, 3, 0, 1); EXPECT_EQ(result, 1); @@ -115,7 +116,7 @@ TEST(fftFPGATest, ValidSp3dFFTDDR){ fft_time = fftfpgaf_c2c_3d_ddr(N, inp, out, 0); - int result = verify_sp_fft3d_fftw(out, inp, N, 0, 1); + int result = verify_fftwf(out, inp, N, 3, 0, 1); EXPECT_EQ(result, 1); @@ -163,18 +164,19 @@ TEST(fft3dFPGATest, InputValidityDDRSVMBatch){ /** * \brief fftfpgaf_c2c_3d_ddr_svm_batch() */ -TEST(fftFPGATest, ValidSp3dFFTDDRSVMBatch){ +TEST(fft3dFPGATest, ValidSp3dFFTDDRSVMBatch){ // check correctness of output for a random number of batches #ifdef USE_FFTW // malloc data to input const int N = (1 << 6); fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft3d_ddr/fft3d_ddr.aocx", 0); + int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft3d_ddr/fft3d_ddr.aocx", true); ASSERT_EQ(isInit, 0); // Random number of batches between 1 and 10 - int how_many = (rand() % 10) + 1; + int how_many = 2; + //int how_many = (rand() % 10) + 1; size_t sz = sizeof(float2) * N * N * N * how_many; unsigned num_pts = how_many * N * N * N; @@ -183,11 +185,11 @@ TEST(fftFPGATest, ValidSp3dFFTDDRSVMBatch){ fftf_create_data(inp, num_pts); - fft_time = fftfpgaf_c2c_3d_ddr_svm_batch(N, inp, out, 0, how_many); + fft_time = fftfpgaf_c2c_3d_ddr_svm_batch(N, inp, out, false, how_many); - int result = verify_sp_fft3d_fftw(out, inp, N, 0, how_many); + bool result = verify_fftwf(out, inp, N, 3, false, how_many); - EXPECT_EQ(result, 1); + EXPECT_TRUE(result); free(inp); free(out); From 07dba1f4c3c70a8a187b468dc852ff81684b687c Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Tue, 6 Oct 2020 12:50:32 +0200 Subject: [PATCH 33/76] Working DDR batch of 2 --- api/include/fftfpga/fftfpga.h | 8 +- api/src/fftfpga.c | 605 +++++++++++++++++++++++++++++++++- examples/CMakeLists.txt | 2 +- tests/CMakeLists.txt | 2 +- 4 files changed, 606 insertions(+), 11 deletions(-) diff --git a/api/include/fftfpga/fftfpga.h b/api/include/fftfpga/fftfpga.h index 1ed7498..47a8206 100755 --- a/api/include/fftfpga/fftfpga.h +++ b/api/include/fftfpga/fftfpga.h @@ -99,11 +99,11 @@ extern fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, bool inv, int ite * @param inp : float2 pointer to input data of size [N * N] * @param out : float2 pointer to output data of size [N * N] * @param inv : int toggle to activate backward FFT - * @param interleaving : 1 if interleaved global memory buffers + * @param interleaving : enable interleaved global memory buffers * @param how_many : number of 2D FFTs to computer, default 1 * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, bool inv, int interleaving, int how_many); +extern fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, bool inv, bool interleaving, int how_many); /** * @brief compute an out-of-place single precision complex 2DFFT using the BRAM of the FPGA and Shared Virtual Memory for Host to Device Communication @@ -132,7 +132,7 @@ extern fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, bool inv); * @param inp : float2 pointer to input data of size [N * N * N] * @param out : float2 pointer to output data of size [N * N * N] * @param inv : int toggle to activate backward FFT - * @param interleaving : 1 if using burst interleaved global memory buffers + * @param interleaving : enable burst interleaved global memory buffers * @return fpga_t : time taken in milliseconds for data transfers and execution */ extern fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, bool inv, bool interleaving); @@ -147,6 +147,8 @@ extern fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, bool inv, bo */ extern fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, bool inv); +extern fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, float2 *inp, float2 *out, bool inv, bool interleaving, int how_many); + /** * @brief compute an out-of-place single precision complex 3D-FFT using the DDR of the FPGA and Shared Virtual Memory for Host to Device Communication * @param N : integer pointer addressing the size of FFT3d diff --git a/api/src/fftfpga.c b/api/src/fftfpga.c index 3979325..67d83c6 100755 --- a/api/src/fftfpga.c +++ b/api/src/fftfpga.c @@ -26,7 +26,7 @@ static cl_command_queue queue4 = NULL, queue5 = NULL, queue6 = NULL; static cl_command_queue queue7 = NULL, queue8 = NULL; //static int svm_handle; -static int svm_enabled = 0; +static bool svm_enabled = false; #endif static void queue_setup(); @@ -110,7 +110,7 @@ int fpga_initialize(const char *platform_name, const char *path, bool use_svm){ } else{ printf("Supports SVM \n"); - svm_enabled = 1; + svm_enabled = true; } } @@ -398,7 +398,7 @@ fpga_t fftfpgaf_c2c_1d_svm(int N, float2 *inp, float2 *out, bool inv, int batch) int num_pts = N * batch; // if N is not a power of 2 - if(inp == NULL || out == NULL || ( (N & (N-1)) !=0)){ + if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || !(svm_enabled)){ return fft_time; } @@ -644,10 +644,10 @@ fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, bool inv){ * \param inp : float2 pointer to input data of size [N * N] * \param out : float2 pointer to output data of size [N * N] * \param inv : int toggle to activate backward FFT - * \param interleaving : 1 if interleaved global memory buffers + * \param interleaving : enable interleaved global memory buffers * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, bool inv, int interleaving, int how_many){ +fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, bool inv, bool interleaving, int how_many){ fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_kernel ffta_kernel = NULL, fftb_kernel = NULL; cl_kernel fetch_kernel = NULL, store_kernel = NULL; @@ -805,6 +805,173 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, bool inv, int inter return fft_time; } +/** + * \brief compute an out-of-place single precision complex 2DFFT using the BRAM of the FPGA and Shared Virtual Memory for Host to Device Communication + * \param N : integer pointer to size of FFT2d + * \param inp : float2 pointer to input data of size [N * N] + * \param out : float2 pointer to output data of size [N * N] + * \param inv : int toggle to activate backward FFT + * \return fpga_t : time taken in milliseconds for data transfers and execution + */ +fpga_t fftfpgaf_c2c_2d_bram_svm(int N, float2 *inp, float2 *out, bool inv, int how_many){ + fpga_t fft_time = {0.0, 0.0, 0.0, 0}; + cl_int status = 0; + int num_pts = how_many * N * N; + + cl_kernel ffta_kernel = NULL, fftb_kernel = NULL; + cl_kernel fetch_kernel = NULL, store_kernel = NULL; + cl_kernel transpose_kernel = NULL; + + // if N is not a power of 2 + if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || (!svm_enabled)){ + return fft_time; + } + +#ifdef VERBOSE + printf("Launching%s 2d FFT transform in BRAM using SVM\n", inv ? " inverse":""); +#endif + + queue_setup(); + + // allocate SVM buffers + float2 *h_inData, *h_outData; + h_inData = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); + h_outData = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map input data"); + + // copy data into h_inData + for(size_t i = 0; i < num_pts; i++){ + h_inData[i].x = inp[i].x; + h_inData[i].y = inp[i].y; + } + + status = clEnqueueSVMUnmap(queue1, (void *)h_inData, 0, NULL, NULL); + checkError(status, "Failed to unmap input data"); + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map input data"); + + // copy data into h_inData + for(size_t i = 0; i < num_pts; i++){ + h_outData[i].x = 0.0; + h_outData[i].y = 0.0; + } + + status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); + checkError(status, "Failed to unmap input data"); + + // Can't pass bool to device, so convert it to int + int inverse_int = (int)inv; + + ffta_kernel = clCreateKernel(program, "fft2da", &status); + checkError(status, "Failed to create fft2da kernel"); + + fftb_kernel = clCreateKernel(program, "fft2db", &status); + checkError(status, "Failed to create fft2db kernel"); + + fetch_kernel = clCreateKernel(program, "fetchBitrev", &status); + checkError(status, "Failed to create fetch kernel"); + + transpose_kernel = clCreateKernel(program, "transpose", &status); + checkError(status, "Failed to create transpose1 kernel"); + + store_kernel = clCreateKernel(program, "transposeStore", &status); + checkError(status, "Failed to create store kernel"); + + // write to fetch kernel using SVM based PCIe + status = clSetKernelArgSVMPointer(fetch_kernel, 0, (void *)h_inData); + checkError(status, "Failed to set fetch1 kernel arg"); + + status = clSetKernelArg(fetch_kernel, 1, sizeof(cl_int), (void *)&how_many); + checkError(status, "Failed to set fetch kernel arg 1"); + + status = clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set ffta kernel arg 0"); + + status = clSetKernelArg(ffta_kernel, 1, sizeof(cl_int), (void*)&how_many); + checkError(status, "Failed to set ffta kernel arg 1"); + + status = clSetKernelArg(transpose_kernel, 0, sizeof(cl_int), (void*)&how_many); + checkError(status, "Failed to set transpose kernel arg 0"); + + status = clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftb kernel arg 0"); + + status = clSetKernelArg(fftb_kernel, 1, sizeof(cl_int), (void*)&how_many); + checkError(status, "Failed to set fftb kernel arg 1"); + + // kernel stores using SVM based PCIe to host + status = clSetKernelArgSVMPointer(store_kernel, 0, (void*)h_outData); + checkError(status, "Failed to set store2 kernel arg"); + + status = clSetKernelArg(store_kernel, 1, sizeof(cl_int), (void *)&how_many); + checkError(status, "Failed to set store kernel arg"); + + fft_time.exec_t = getTimeinMilliSec(); + status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose1 kernel"); + + status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second fft kernel"); + + status = clEnqueueTask(queue5, store_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch store kernel"); + + // Wait for all command queues to complete pending events + status = clFinish(queue5); + checkError(status, "failed to finish queue5"); + status = clFinish(queue1); + checkError(status, "failed to finish queue1"); + status = clFinish(queue2); + checkError(status, "failed to finish queue2"); + status = clFinish(queue3); + checkError(status, "failed to finish queue3"); + status = clFinish(queue4); + checkError(status, "failed to finish queue4"); + fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, + (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map out data"); + + for(size_t i = 0; i < num_pts; i++){ + out[i].x = h_outData[i].x; + out[i].y = h_outData[i].y; + } + + status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); + checkError(status, "Failed to unmap out data"); + + if (h_inData) + clSVMFree(context, h_inData); + if (h_outData) + clSVMFree(context, h_outData); + + queue_cleanup(); + + if(fetch_kernel) + clReleaseKernel(fetch_kernel); + if(ffta_kernel) + clReleaseKernel(ffta_kernel); + if(fftb_kernel) + clReleaseKernel(fftb_kernel); + if(transpose_kernel) + clReleaseKernel(transpose_kernel); + if(store_kernel) + clReleaseKernel(store_kernel); + + fft_time.valid = 1; + return fft_time; +} + /** * \brief compute an out-of-place single precision complex 3D-FFT using the BRAM of the FPGA * \param N : integer pointer addressing the size of FFT3d @@ -1003,7 +1170,7 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, float2 *inp, float2 *out, bool inv) { */ // if N is not a power of 2 - if(inp == NULL || out == NULL || ( (N & (N-1)) !=0)){ + if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || !(svm_enabled)){ return fft_time; } @@ -1094,6 +1261,7 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, float2 *inp, float2 *out, bool inv) { checkError(status, "Failed to set store2 kernel arg"); fft_time.exec_t = getTimeinMilliSec(); + double first_half = getTimeinMilliSec(); status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fetch kernel"); @@ -1109,6 +1277,19 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, float2 *inp, float2 *out, bool inv) { status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); checkError(status, "Failed to launch second transpose kernel"); + status = clFinish(queue1); + checkError(status, "failed to finish"); + status = clFinish(queue2); + checkError(status, "failed to finish"); + status = clFinish(queue3); + checkError(status, "failed to finish"); + status = clFinish(queue4); + checkError(status, "failed to finish"); + status = clFinish(queue5); + checkError(status, "failed to finish"); + first_half = getTimeinMilliSec() - first_half; + + double second_half = getTimeinMilliSec(); // enqueue fetch to same queue as the store kernel due to data dependency status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fetch kernel"); @@ -1125,12 +1306,17 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, float2 *inp, float2 *out, bool inv) { checkError(status, "failed to finish"); status = clFinish(queue3); checkError(status, "failed to finish"); + /* status = clFinish(queue2); checkError(status, "failed to finish"); status = clFinish(queue1); checkError(status, "failed to finish"); + */ + second_half = getTimeinMilliSec() - second_half; fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + + printf("First half: %lf Second half: %lf\n\n", first_half, second_half); status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); @@ -1348,6 +1534,413 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, bool inv) { return fft_time; } +/** + * \brief compute an batched out-of-place single precision complex 3D-FFT using the DDR of the FPGA for 3D Transpose + * \param N : integer pointer addressing the size of FFT3d + * \param inp : float2 pointer to input data of size [N * N * N] + * \param out : float2 pointer to output data of size [N * N * N] + * \param inv : int toggle to activate backward FFT + * \param interleaving : enable burst interleaved global memory buffers + * \param how_many : number of batched computations + * \return fpga_t : time taken in milliseconds for data transfers and execution + */ +fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, float2 *inp, float2 *out, bool inv, bool interleaving, int how_many) { + fpga_t fft_time = {0.0, 0.0, 0.0, 0}; + cl_int status = 0; + int num_pts = N * N * N; + + // if N is not a power of 2 + if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || (how_many <= 0)){ + return fft_time; + } + +#ifdef VERBOSE + printf("Launching%s 3d FFT transform in DDR for Batched execution\n", inv ? " inverse":""); +#endif + + // Can't pass bool to device, so convert it to int + int inverse_int = (int)inv; + + // Setup kernels + cl_kernel fetch1_kernel = clCreateKernel(program, "fetchBitrev1", &status); + checkError(status, "Failed to create fetch1 kernel"); + cl_kernel ffta_kernel = clCreateKernel(program, "fft3da", &status); + checkError(status, "Failed to create fft3da kernel"); + cl_kernel transpose_kernel = clCreateKernel(program, "transpose", &status); + checkError(status, "Failed to create transpose kernel"); + cl_kernel fftb_kernel = clCreateKernel(program, "fft3db", &status); + checkError(status, "Failed to create fft3db kernel"); + cl_kernel store1_kernel = clCreateKernel(program, "transposeStore1", &status); + checkError(status, "Failed to create store1 kernel"); + + cl_kernel fetch2_kernel = clCreateKernel(program, "fetchBitrev2", &status); + checkError(status, "Failed to create fetch2 kernel"); + cl_kernel fftc_kernel = clCreateKernel(program, "fft3dc", &status); + checkError(status, "Failed to create fft3dc kernel"); + cl_kernel store2_kernel = clCreateKernel(program, "transposeStore2", &status); + checkError(status, "Failed to create store2 kernel"); + + // Setup Queues to the kernels + queue_setup(); + + // Device memory buffers: using 1st and 2nd banks + // Double Buffers, using 3rd and 4th banks + cl_mem d_inData1, d_inData2, d_outData1, d_outData2; + d_inData1 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate input device buffer\n"); + + d_inData2 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_3_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate input device buffer\n"); + + d_outData1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + d_outData2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_3_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + cl_mem d_transpose; + d_transpose = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + // Default Kernel Arguments + status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData1); + checkError(status, "Failed to set fetch1 kernel arg"); + status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set ffta kernel arg"); + status=clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftb kernel arg"); + status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_transpose); + checkError(status, "Failed to set store1 kernel arg"); + + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_transpose); + checkError(status, "Failed to set fetch2 kernel arg"); + status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftc kernel arg"); + status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData1); + checkError(status, "Failed to set store2 kernel arg"); + + fft_time.exec_t = getTimeinMilliSec(); + + // First Phase + // Write to DDR first buffer + status = clEnqueueWriteBuffer(queue1, d_inData1, CL_TRUE, 0, sizeof(float2) * num_pts, inp, 0, NULL, NULL); + + status = clFinish(queue1); + checkError(status, "failed to finish"); + + // Second Phase + // Unblocking write to DDR second buffer from index num_pts + cl_event write_event[2]; + status = clEnqueueWriteBuffer(queue6, d_inData2, CL_FALSE, 0, sizeof(float2) * num_pts, (void*)&inp[num_pts], 0, NULL, &write_event[0]); + checkError(status, "Failed to write to DDR buffer"); + + // Compute First FFT already transferred + status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second fft kernel"); + + status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second transpose kernel"); + + status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, store2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + // Check finish of transfer and computations + status = clFinish(queue6); + checkError(status, "failed to finish"); + status = clFinish(queue5); + checkError(status, "failed to finish"); + status = clFinish(queue4); + checkError(status, "failed to finish"); + status = clFinish(queue3); + checkError(status, "failed to finish"); + status = clFinish(queue2); + checkError(status, "failed to finish"); + status = clFinish(queue1); + checkError(status, "failed to finish"); + + clWaitForEvents(1, &write_event[0]); + clReleaseEvent(write_event[0]); + + status = clEnqueueReadBuffer(queue6, d_outData1, CL_FALSE, 0, sizeof(float2) * num_pts, out, 0, NULL, &write_event[0]); + checkError(status, "Failed to read from DDR buffer"); + + status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData2); + checkError(status, "Failed to set fetch1 kernel arg"); + + status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData2); + checkError(status, "Failed to set store2 kernel arg"); + + status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second fft kernel"); + + status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second transpose kernel"); + + status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, store2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + // Check finish of transfer and computations + status = clFinish(queue6); + checkError(status, "failed to finish"); + status = clFinish(queue5); + checkError(status, "failed to finish"); + status = clFinish(queue4); + checkError(status, "failed to finish"); + status = clFinish(queue3); + checkError(status, "failed to finish"); + status = clFinish(queue2); + checkError(status, "failed to finish"); + status = clFinish(queue1); + checkError(status, "failed to finish"); + + clWaitForEvents(1, &write_event[0]); + clReleaseEvent(write_event[0]); + + status = clEnqueueReadBuffer(queue6, d_outData2, CL_FALSE, 0, sizeof(float2) * num_pts, &out[num_pts], 0, NULL, &write_event[1]); + checkError(status, "Failed to read from DDR buffer"); + + status = clFinish(queue6); + checkError(status, "failed to finish reading DDR using PCIe"); + + clWaitForEvents(1, &write_event[1]); + fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + checkError(status, "Failed to copy data from device"); + + clReleaseEvent(write_event[1]); + queue_cleanup(); + + // Loop over the 3 stages + /* + for(size_t i = 2; i < how_many - 1; i++){ + + // Unblocking transfers between DDR and host + if( (i % 2) == 0){ + status = clEnqueueWriteBuffer(queue7, d_inData1, CL_FALSE, 0, sizeof(float2) * num_pts, &inp[(i * num_pts)], 0, NULL, &write_event[0]); + checkError(status, "Failed to write to DDR buffer"); + + status = clEnqueueReadBuffer(queue6, d_outData1, CL_FALSE, 0, sizeof(float2) * num_pts, &out[((i - 2) * num_pts)], 0, NULL, &write_event[1]); + checkError(status, "Failed to read from DDR buffer"); + + status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData2); + checkError(status, "Failed to set fetch1 kernel arg"); + + status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData2); + checkError(status, "Failed to set store2 kernel arg"); + } + else{ + status = clEnqueueWriteBuffer(queue7, d_inData2, CL_FALSE, 0, sizeof(float2) * num_pts, &inp[(i * num_pts)], 0, NULL, &write_event[0]); + checkError(status, "Failed to write to DDR buffer"); + + status = clEnqueueReadBuffer(queue6, d_outData2, CL_FALSE, 0, sizeof(float2) * num_pts, &out[((i - 2) * num_pts)], 0, NULL, &write_event[1]); + checkError(status, "Failed to read from DDR buffer"); + + status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData1); + checkError(status, "Failed to set fetch1 kernel arg"); + + status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData1); + checkError(status, "Failed to set store2 kernel arg"); + } + + // Set Kernel Arguments before execution + status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second fft kernel"); + + status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second transpose kernel"); + + status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, store2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clFinish(queue7); + checkError(status, "failed to finish"); + status = clFinish(queue6); + checkError(status, "failed to finish"); + status = clFinish(queue5); + checkError(status, "failed to finish"); + status = clFinish(queue4); + checkError(status, "failed to finish"); + status = clFinish(queue3); + checkError(status, "failed to finish"); + status = clFinish(queue2); + checkError(status, "failed to finish"); + status = clFinish(queue1); + checkError(status, "failed to finish"); + + clWaitForEvents(2, write_event); + clReleaseEvent(write_event[0]); + clReleaseEvent(write_event[1]); + } + + if( (N % 2) == 1){ + status = clEnqueueReadBuffer(queue6, d_outData1, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(how_many - 2) * num_pts], 0, NULL, &write_event[0]); + checkError(status, "Failed to read from DDR buffer"); + + status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData2); + checkError(status, "Failed to set fetch1 kernel arg"); + + status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData2); + checkError(status, "Failed to set store2 kernel arg"); + } + else{ + status = clEnqueueReadBuffer(queue6, d_outData2, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(how_many - 2) * num_pts], 0, NULL, &write_event[0]); + checkError(status, "Failed to read from DDR buffer"); + + status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData1); + checkError(status, "Failed to set fetch1 kernel arg"); + + status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData1); + checkError(status, "Failed to set store2 kernel arg"); + } + + status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second fft kernel"); + + status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second transpose kernel"); + + status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, store2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clFinish(queue6); + checkError(status, "failed to finish"); + status = clFinish(queue5); + checkError(status, "failed to finish"); + status = clFinish(queue4); + checkError(status, "failed to finish"); + status = clFinish(queue3); + checkError(status, "failed to finish"); + status = clFinish(queue2); + checkError(status, "failed to finish"); + status = clFinish(queue1); + checkError(status, "failed to finish"); + if( (how_many % 2) == 0){ + status = clEnqueueReadBuffer(queue6, d_outData2, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(how_many - 1) * num_pts], 0, NULL, &write_event[1]); + checkError(status, "Failed to read from DDR buffer"); + } + else{ + status = clEnqueueReadBuffer(queue6, d_outData1, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(how_many - 1) * num_pts], 0, NULL, &write_event[1]); + checkError(status, "Failed to read from DDR buffer"); + } + + status = clFinish(queue6); + checkError(status, "failed to finish reading DDR using PCIe"); + + clWaitForEvents(2, write_event); + fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + checkError(status, "Failed to copy data from device"); + + + clReleaseEvent(write_event[0]); + clReleaseEvent(write_event[1]); + queue_cleanup(); + */ + + if (d_inData1) + clReleaseMemObject(d_inData1); + if (d_inData2) + clReleaseMemObject(d_inData2); + + if (d_outData2) + clReleaseMemObject(d_outData2); + if (d_outData2) + clReleaseMemObject(d_outData2); + + if (d_transpose) + clReleaseMemObject(d_transpose); + + if(fetch1_kernel) + clReleaseKernel(fetch1_kernel); + if(fetch2_kernel) + clReleaseKernel(fetch2_kernel); + + if(ffta_kernel) + clReleaseKernel(ffta_kernel); + if(fftb_kernel) + clReleaseKernel(fftb_kernel); + if(fftc_kernel) + clReleaseKernel(fftc_kernel); + + if(transpose_kernel) + clReleaseKernel(transpose_kernel); + + if(store1_kernel) + clReleaseKernel(store1_kernel); + if(store2_kernel) + clReleaseKernel(store2_kernel); + + fft_time.valid = 1; + return fft_time; +} + +/** + * \brief compute an batched out-of-place single precision complex 3D-FFT using the DDR of the FPGA for 3D Transpose and for data transfers between host's main memory and FPGA using Shared Virtual Memory + * \param N : integer pointer addressing the size of FFT3d + * \param inp : float2 pointer to input data of size [N * N * N] + * \param out : float2 pointer to output data of size [N * N * N] + * \param inv : int toggle to activate backward FFT + * \param how_many : number of batched computations + * \return fpga_t : time taken in milliseconds for data transfers and execution + */ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, float2 *inp, float2 *out, bool inv, int how_many) { fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_int status = 0; diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 00b269f..3986922 100755 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -4,7 +4,7 @@ project(examplesfftfpga VERSION 0.1 DESCRIPTION "Example Code that uses libfftfpga" LANGUAGES C CXX) -set(examples fft3d_ddr fft3d_bram fft3d_ddr_svm fft3d_ddr_svm_batch fft2d fft1d fft1d_svm) +set(examples fft3d_ddr fft3d_bram fft3d_ddr_svm fft3d_ddr_batch fft3d_ddr_svm_batch fft2d fft1d fft1d_svm) # create a target for each of the example foreach(example ${examples}) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 816623d..f215603 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -37,7 +37,7 @@ endif() add_dependencies(test_fftfpga fft3d_bram_emu) add_dependencies(test_fftfpga fft3d_ddr_emu) -add_dependencies(test_fftfpga fft2d_bram_emu) +add_dependencies(test_fftfpga fft2d_bram_opt_emu) add_dependencies(test_fftfpga fft2d_ddr_emu) add_dependencies(test_fftfpga fft1d_emu) From 19ff6a6616a66d6a8cef2c641b8bc2aa8d8f6781 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Tue, 6 Oct 2020 19:36:03 +0200 Subject: [PATCH 34/76] working DDR batch for all --- api/src/fftfpga.c | 103 ++++++++++------------------------------------ 1 file changed, 21 insertions(+), 82 deletions(-) diff --git a/api/src/fftfpga.c b/api/src/fftfpga.c index 67d83c6..b27a82a 100755 --- a/api/src/fftfpga.c +++ b/api/src/fftfpga.c @@ -1550,7 +1550,7 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, float2 *inp, float2 *out, bool inv, bool int num_pts = N * N * N; // if N is not a power of 2 - if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || (how_many <= 0)){ + if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || (how_many <= 1)){ return fft_time; } @@ -1676,81 +1676,18 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, float2 *inp, float2 *out, bool inv, bool clWaitForEvents(1, &write_event[0]); clReleaseEvent(write_event[0]); - status = clEnqueueReadBuffer(queue6, d_outData1, CL_FALSE, 0, sizeof(float2) * num_pts, out, 0, NULL, &write_event[0]); - checkError(status, "Failed to read from DDR buffer"); - - status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData2); - checkError(status, "Failed to set fetch1 kernel arg"); - - status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData2); - checkError(status, "Failed to set store2 kernel arg"); - - status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - - status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); - - status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second fft kernel"); - - status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second transpose kernel"); - - status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - - status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue3, store2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); - - // Check finish of transfer and computations - status = clFinish(queue6); - checkError(status, "failed to finish"); - status = clFinish(queue5); - checkError(status, "failed to finish"); - status = clFinish(queue4); - checkError(status, "failed to finish"); - status = clFinish(queue3); - checkError(status, "failed to finish"); - status = clFinish(queue2); - checkError(status, "failed to finish"); - status = clFinish(queue1); - checkError(status, "failed to finish"); - - clWaitForEvents(1, &write_event[0]); - clReleaseEvent(write_event[0]); - - status = clEnqueueReadBuffer(queue6, d_outData2, CL_FALSE, 0, sizeof(float2) * num_pts, &out[num_pts], 0, NULL, &write_event[1]); - checkError(status, "Failed to read from DDR buffer"); - - status = clFinish(queue6); - checkError(status, "failed to finish reading DDR using PCIe"); - - clWaitForEvents(1, &write_event[1]); - fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; - checkError(status, "Failed to copy data from device"); - - clReleaseEvent(write_event[1]); - queue_cleanup(); - // Loop over the 3 stages - /* - for(size_t i = 2; i < how_many - 1; i++){ + + for(size_t i = 2; i < how_many; i++){ // Unblocking transfers between DDR and host if( (i % 2) == 0){ - status = clEnqueueWriteBuffer(queue7, d_inData1, CL_FALSE, 0, sizeof(float2) * num_pts, &inp[(i * num_pts)], 0, NULL, &write_event[0]); - checkError(status, "Failed to write to DDR buffer"); - - status = clEnqueueReadBuffer(queue6, d_outData1, CL_FALSE, 0, sizeof(float2) * num_pts, &out[((i - 2) * num_pts)], 0, NULL, &write_event[1]); + status = clEnqueueReadBuffer(queue6, d_outData1, CL_FALSE, 0, sizeof(float2) * num_pts, &out[((i - 2) * num_pts)], 0, NULL, &write_event[0]); checkError(status, "Failed to read from DDR buffer"); + status = clEnqueueWriteBuffer(queue7, d_inData1, CL_FALSE, 0, sizeof(float2) * num_pts, &inp[(i * num_pts)], 0, NULL, &write_event[1]); + checkError(status, "Failed to write to DDR buffer"); + status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData2); checkError(status, "Failed to set fetch1 kernel arg"); @@ -1758,12 +1695,12 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, float2 *inp, float2 *out, bool inv, bool checkError(status, "Failed to set store2 kernel arg"); } else{ - status = clEnqueueWriteBuffer(queue7, d_inData2, CL_FALSE, 0, sizeof(float2) * num_pts, &inp[(i * num_pts)], 0, NULL, &write_event[0]); - checkError(status, "Failed to write to DDR buffer"); - - status = clEnqueueReadBuffer(queue6, d_outData2, CL_FALSE, 0, sizeof(float2) * num_pts, &out[((i - 2) * num_pts)], 0, NULL, &write_event[1]); + status = clEnqueueReadBuffer(queue6, d_outData2, CL_FALSE, 0, sizeof(float2) * num_pts, &out[((i - 2) * num_pts)], 0, NULL, &write_event[0]); checkError(status, "Failed to read from DDR buffer"); + status = clEnqueueWriteBuffer(queue7, d_inData2, CL_FALSE, 0, sizeof(float2) * num_pts, &inp[(i * num_pts)], 0, NULL, &write_event[1]); + checkError(status, "Failed to write to DDR buffer"); + status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData1); checkError(status, "Failed to set fetch1 kernel arg"); @@ -1816,7 +1753,7 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, float2 *inp, float2 *out, bool inv, bool clReleaseEvent(write_event[1]); } - if( (N % 2) == 1){ + if( (how_many % 2) == 0){ status = clEnqueueReadBuffer(queue6, d_outData1, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(how_many - 2) * num_pts], 0, NULL, &write_event[0]); checkError(status, "Failed to read from DDR buffer"); @@ -1873,27 +1810,29 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, float2 *inp, float2 *out, bool inv, bool checkError(status, "failed to finish"); status = clFinish(queue1); checkError(status, "failed to finish"); + + clWaitForEvents(1, &write_event[0]); + clReleaseEvent(write_event[0]); + if( (how_many % 2) == 0){ - status = clEnqueueReadBuffer(queue6, d_outData2, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(how_many - 1) * num_pts], 0, NULL, &write_event[1]); + status = clEnqueueReadBuffer(queue6, d_outData2, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(how_many - 1) * num_pts], 0, NULL, &write_event[0]); checkError(status, "Failed to read from DDR buffer"); } else{ - status = clEnqueueReadBuffer(queue6, d_outData1, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(how_many - 1) * num_pts], 0, NULL, &write_event[1]); + status = clEnqueueReadBuffer(queue6, d_outData1, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(how_many - 1) * num_pts], 0, NULL, &write_event[0]); checkError(status, "Failed to read from DDR buffer"); } status = clFinish(queue6); checkError(status, "failed to finish reading DDR using PCIe"); - clWaitForEvents(2, write_event); + clWaitForEvents(1, &write_event[0]); + clReleaseEvent(write_event[0]); + fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; checkError(status, "Failed to copy data from device"); - - clReleaseEvent(write_event[0]); - clReleaseEvent(write_event[1]); queue_cleanup(); - */ if (d_inData1) clReleaseMemObject(d_inData1); From 9e2cf206719e9f2a9e0cb74d0e9fd595e0ea3274 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Mon, 12 Oct 2020 23:20:05 +0200 Subject: [PATCH 35/76] Split to individual files --- api/CMakeLists.txt | 3 + api/src/fft1d.c | 371 +++++++++ api/src/fft2d.c | 481 ++++++++++++ api/src/fft3d.c | 1352 ++++++++++++++++++++++++++++++++ api/src/fftfpga.c | 1787 ++---------------------------------------- api/src/fpga_state.h | 23 + 6 files changed, 2282 insertions(+), 1735 deletions(-) create mode 100644 api/src/fft1d.c create mode 100644 api/src/fft2d.c create mode 100644 api/src/fft3d.c create mode 100644 api/src/fpga_state.h diff --git a/api/CMakeLists.txt b/api/CMakeLists.txt index 645749d..2e8cb26 100755 --- a/api/CMakeLists.txt +++ b/api/CMakeLists.txt @@ -10,6 +10,9 @@ project(fftfpga VERSION 0.3 ## add_library(${PROJECT_NAME} STATIC ${PROJECT_SOURCE_DIR}/src/fftfpga.c + ${PROJECT_SOURCE_DIR}/src/fft3d.c + ${PROJECT_SOURCE_DIR}/src/fft2d.c + ${PROJECT_SOURCE_DIR}/src/fft1d.c ${PROJECT_SOURCE_DIR}/src/svm.c ${PROJECT_SOURCE_DIR}/src/opencl_utils.c ${PROJECT_SOURCE_DIR}/src/misc.c) diff --git a/api/src/fft1d.c b/api/src/fft1d.c new file mode 100644 index 0000000..efa9da9 --- /dev/null +++ b/api/src/fft1d.c @@ -0,0 +1,371 @@ +// Author: Arjun Ramaswami + +#include +#include +#include +#include +#include +#define CL_VERSION_2_0 +#include // to disable interleaving & transfer data to specific banks - CL_CHANNEL_1_INTELFPGA +#include "CL/opencl.h" + +#include "fpga_state.h" +#include "fftfpga/fftfpga.h" +#include "svm.h" +#include "opencl_utils.h" +#include "misc.h" + +/** + * \brief compute an out-of-place double precision complex 1D-FFT on the FPGA + * \param N : integer pointer to size of FFT3d + * \param inp : double2 pointer to input data of size N + * \param out : double2 pointer to output data of size N + * \param inv : int toggle to activate backward FFT + * \param batch : number of batched executions of 1D FFT + * \return fpga_t : time taken in milliseconds for data transfers and execution + */ +fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, bool inv, int batch){ + fpga_t fft_time = {0.0, 0.0, 0.0, 0}; + cl_kernel fetch_kernel = NULL, fft_kernel = NULL; + cl_int status = 0; + + // if N is not a power of 2 + if(inp == NULL || out == NULL || ((N & (N-1)) !=0)){ + return fft_time; + } + +#ifdef VERBOSE + printf("Launching%s FFT transform of %d batches \n", inv ? " inverse":"", batch); +#endif + + queue_setup(); + + cl_mem d_inData, d_outData; + d_inData = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(double2) * N * batch, NULL, &status); + checkError(status, "Failed to allocate input device buffer\n"); + + d_outData = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(double2) * N * batch, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + // Copy data from host to device + fft_time.pcie_write_t = getTimeinMilliSec(); + + status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(double2) * N * batch, inp, 0, NULL, NULL); + + status = clFinish(queue1); + checkError(status, "failed to finish writing buffer using PCIe"); + + fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; + checkError(status, "Failed to copy data to device"); + + // Can't pass bool to device, so convert it to int + int inverse_int = (int)inv; + + // Create Kernels - names must match the kernel name in the original CL file + fetch_kernel = clCreateKernel(program, "fetch", &status); + checkError(status, "Failed to create fetch kernel"); + + fft_kernel = clCreateKernel(program, "fft1d", &status); + checkError(status, "Failed to create fft1d kernel"); + // Set the kernel arguments + // from here + status = clSetKernelArg(fetch_kernel, 0, sizeof(cl_mem), (void *)&d_inData); + checkError(status, "Failed to set fetch_kernel arg 0"); + status = clSetKernelArg(fft_kernel, 0, sizeof(cl_mem), (void *)&d_outData); + checkError(status, "Failed to set fft_kernel arg 0"); + status = clSetKernelArg(fft_kernel, 1, sizeof(cl_int), (void*)&batch); + checkError(status, "Failed to set fft_kernel arg 1"); + status = clSetKernelArg(fft_kernel, 2, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fft_kernel arg 2"); + + printf(inverse_int ? "\tInverse FFT" : "\tFFT"); + printf(" kernel initialization is complete.\n"); + + size_t ls = N/8; + size_t gs = batch * ls; + + // Measure execution time + fft_time.exec_t = getTimeinMilliSec(); + + // FFT1d kernel is the SWI kernel + status = clEnqueueTask(queue1, fft_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft1d kernel"); + + status = clEnqueueNDRangeKernel(queue2, fetch_kernel, 1, NULL, &gs, &ls, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + // Wait for command queue to complete pending events + status = clFinish(queue1); + checkError(status, "Failed to finish queue1"); + status = clFinish(queue2); + checkError(status, "Failed to finish queue2"); + + // Record execution time + fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + + // Copy results from device to host + fft_time.pcie_read_t = getTimeinMilliSec(); + status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * batch, out, 0, NULL, NULL); + + status = clFinish(queue1); + checkError(status, "failed to finish reading buffer using PCIe"); + + fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; + checkError(status, "Failed to copy data from device"); + + // Cleanup + if (d_inData) + clReleaseMemObject(d_inData); + if (d_outData) + clReleaseMemObject(d_outData); + if(fetch_kernel) + clReleaseKernel(fetch_kernel); + if(fft_kernel) + clReleaseKernel(fft_kernel); + queue_cleanup(); + fft_time.valid = 1; + return fft_time; +} + +/** + * \brief compute an out-of-place single precision complex 1D-FFT on the FPGA + * \param N : integer pointer to size of FFT3d + * \param inp : float2 pointer to input data of size N + * \param out : float2 pointer to output data of size N + * \param inv : true for backward transforms + * \param batch : number of batched executions of 1D FFT + * \return fpga_t : time taken in milliseconds for data transfers and execution + */ +fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, bool inv, int batch){ + + fpga_t fft_time = {0.0, 0.0, 0.0, 0}; + cl_kernel kernel1 = NULL, kernel2 = NULL; + cl_int status = 0; + + // if N is not a power of 2 + if(inp == NULL || out == NULL || ( (N & (N-1)) !=0)){ + return fft_time; + } + +#ifdef VERBOSE + printf("Launching%s FFT transform for %d batch \n", inv ? " inverse":"", batch); +#endif + + queue_setup(); + + cl_mem d_inData, d_outData; + printf("Launching%s FFT transform for %d batch \n", inv ? " inverse":"", batch); + + // Create device buffers - assign the buffers in different banks for more efficient memory access + d_inData = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float2) * N * batch, NULL, &status); + checkError(status, "Failed to allocate input device buffer\n"); + + d_outData = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * N * batch, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + // Copy data from host to device + fft_time.pcie_write_t = getTimeinMilliSec(); + + status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * N * batch, inp, 0, NULL, NULL); + + status = clFinish(queue1); + checkError(status, "failed to finish writing buffer using PCIe"); + + fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; + checkError(status, "Failed to copy data to device"); + + // Can't pass bool to device, so convert it to int + int inverse_int = (int)inv; + + // Create Kernels - names must match the kernel name in the original CL file + kernel1 = clCreateKernel(program, "fetch", &status); + checkError(status, "Failed to create fetch kernel"); + + kernel2 = clCreateKernel(program, "fft1d", &status); + checkError(status, "Failed to create fft1d kernel"); + // Set the kernel arguments + status = clSetKernelArg(kernel1, 0, sizeof(cl_mem), (void *)&d_inData); + checkError(status, "Failed to set kernel1 arg 0"); + status = clSetKernelArg(kernel2, 0, sizeof(cl_mem), (void *)&d_outData); + checkError(status, "Failed to set kernel arg 0"); + status = clSetKernelArg(kernel2, 1, sizeof(cl_int), (void*)&batch); + checkError(status, "Failed to set kernel arg 1"); + status = clSetKernelArg(kernel2, 2, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set kernel arg 2"); + + printf(inverse_int ? "\tInverse FFT" : "\tFFT"); + printf(" kernel initialization is complete.\n"); + + size_t ls = N/8; + size_t gs = batch * ls; + + // Measure execution time + fft_time.exec_t = getTimeinMilliSec(); + + // Launch the kernel - we launch a single work item hence enqueue a task + // FFT1d kernel is the SWI kernel + status = clEnqueueTask(queue1, kernel2, 0, NULL, NULL); + checkError(status, "Failed to launch fft1d kernel"); + + status = clEnqueueNDRangeKernel(queue2, kernel1, 1, NULL, &gs, &ls, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + // Wait for command queue to complete pending events + status = clFinish(queue1); + checkError(status, "Failed to finish queue1"); + status = clFinish(queue2); + checkError(status, "Failed to finish queue2"); + + // Record execution time + fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + + // Copy results from device to host + fft_time.pcie_read_t = getTimeinMilliSec(); + status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * batch, out, 0, NULL, NULL); + + status = clFinish(queue1); + checkError(status, "failed to finish reading buffer using PCIe"); + + fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; + checkError(status, "Failed to copy data from device"); + + // Cleanup + if (d_inData) + clReleaseMemObject(d_inData); + if (d_outData) + clReleaseMemObject(d_outData); + if(kernel1) + clReleaseKernel(kernel1); + if(kernel2) + clReleaseKernel(kernel2); + queue_cleanup(); + + fft_time.valid = 1; + return fft_time; +} + +/** + * \brief compute an out-of-place single precision complex 1D-FFT on the FPGA using Shared Virtual Memory for data transfers between host's main memory and FPGA + * \param N : integer pointer to size of FFT3d + * \param inp : float2 pointer to input data of size N + * \param out : float2 pointer to output data of size N + * \param inv : int toggle to activate backward FFT + * \param batch : number of batched executions of 1D FFT + * \return fpga_t : time taken in milliseconds for data transfers and execution + */ +fpga_t fftfpgaf_c2c_1d_svm(int N, float2 *inp, float2 *out, bool inv, int batch){ + fpga_t fft_time = {0.0, 0.0, 0.0, 0}; + cl_int status = 0; + int num_pts = N * batch; + + // if N is not a power of 2 + if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || !(svm_enabled)){ + return fft_time; + } + +#ifdef VERBOSE + printf("Launching%s 1D FFT transform in DDR \n", inv ? " inverse":""); +#endif + + // Can't pass bool to device, so convert it to int + int inverse_int = (int)inv; + + // Setup kernels + cl_kernel fetch_kernel = clCreateKernel(program, "fetch", &status); + checkError(status, "Failed to create fetch1 kernel"); + cl_kernel fft_kernel = clCreateKernel(program, "fft1d", &status); + checkError(status, "Failed to create fft3da kernel"); + + // Setup Queues to the kernels + queue_setup(); + + // allocate SVM buffers + float2 *h_inData, *h_outData; + h_inData = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); + h_outData = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map input data"); + + // copy data into h_inData + for(size_t i = 0; i < num_pts; i++){ + h_inData[i].x = inp[i].x; + h_inData[i].y = inp[i].y; + } + + status = clEnqueueSVMUnmap(queue1, (void *)h_inData, 0, NULL, NULL); + checkError(status, "Failed to unmap input data"); + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map input data"); + + // copy data into h_inData + for(size_t i = 0; i < num_pts; i++){ + h_outData[i].x = 0.0; + h_outData[i].y = 0.0; + } + + status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); + checkError(status, "Failed to unmap input data"); + + // write to fetch kernel using SVM based PCIe + status = clSetKernelArgSVMPointer(fetch_kernel, 0, (void *)h_inData); + checkError(status, "Failed to set fetch kernel arg"); + + // kernel transforms and stores to DDR memory + status = clSetKernelArgSVMPointer(fft_kernel, 0, (void *)h_outData); + checkError(status, "Failed to set store2 kernel arg"); + + status=clSetKernelArg(fft_kernel, 1, sizeof(cl_int), (void*)&batch); + checkError(status, "Failed to set fft kernel arg"); + + status=clSetKernelArg(fft_kernel, 2, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fft kernel arg"); + + size_t ls = N/8; + size_t gs = batch * ls; + + fft_time.exec_t = getTimeinMilliSec(); + status = clEnqueueTask(queue1, fft_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueNDRangeKernel(queue2, fetch_kernel, 1, NULL, &gs, &ls, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clFinish(queue2); + checkError(status, "failed to finish"); + status = clFinish(queue1); + checkError(status, "failed to finish"); + + fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, + (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map out data"); + + for(size_t i = 0; i < num_pts; i++){ + out[i].x = h_outData[i].x; + out[i].y = h_outData[i].y; + } + + status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); + checkError(status, "Failed to unmap out data"); + + if (h_inData) + clSVMFree(context, h_inData); + if (h_outData) + clSVMFree(context, h_outData); + + queue_cleanup(); + + if(fetch_kernel) + clReleaseKernel(fetch_kernel); + + if(fft_kernel) + clReleaseKernel(fft_kernel); + + fft_time.valid = 1; + return fft_time; +} + + diff --git a/api/src/fft2d.c b/api/src/fft2d.c new file mode 100644 index 0000000..9a0ad0c --- /dev/null +++ b/api/src/fft2d.c @@ -0,0 +1,481 @@ +// Author: Arjun Ramaswami + +#include +#include +#include +#include +#include +#define CL_VERSION_2_0 +#include // to disable interleaving & transfer data to specific banks - CL_CHANNEL_1_INTELFPGA +#include "CL/opencl.h" + +#include "fpga_state.h" +#include "fftfpga/fftfpga.h" +#include "svm.h" +#include "opencl_utils.h" +#include "misc.h" + +/** + * \brief compute an out-of-place single precision complex 2D-FFT using the DDR of the FPGA + * \param N : integer pointer to size of FFT2d + * \param inp : float2 pointer to input data of size [N * N] + * \param out : float2 pointer to output data of size [N * N] + * \param inv : int toggle to activate backward FFT + * \param iter : int toggle to activate backward FFT + * \return fpga_t : time taken in milliseconds for data transfers and execution + */ +fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, bool inv){ + fpga_t fft_time = {0.0, 0.0, 0.0, 0}; + cl_kernel fetch_kernel = NULL, fft_kernel = NULL, transpose_kernel = NULL; + cl_int status = 0; + int mangle_int = 0; + + // if N is not a power of 2 + if(inp == NULL || out == NULL || ( (N & (N-1)) !=0)){ + return fft_time; + } + +#ifdef VERBOSE + printf("Launching%s 2d FFT transform \n", inv ? " inverse":""); +#endif + + queue_setup(); + + cl_mem d_inData, d_outData, d_tmp; + + d_inData = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float2) * N * N, NULL, &status); + checkError(status, "Failed to allocate input device buffer\n"); + d_outData = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float2) * N * N, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + d_tmp = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * N * N, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + // Copy data from host to device + fft_time.pcie_write_t = getTimeinMilliSec(); + + status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * N * N, inp, 0, NULL, NULL); + + status = clFinish(queue1); + checkError(status, "failed to finish writing buffer using PCIe"); + + fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; + checkError(status, "Failed to copy data to device"); + + // Can't pass bool to device, so convert it to int + int inverse_int = (int)inv; + + // Create Kernels - names must match the kernel name in the original CL file + fft_kernel = clCreateKernel(program, "fft2d", &status); + checkError(status, "Failed to create kernel"); + fetch_kernel = clCreateKernel(program, "fetch", &status); + checkError(status, "Failed to create kernel"); + transpose_kernel = clCreateKernel(program, "transpose", &status); + checkError(status, "Failed to create kernel"); + + // Record execution time + fft_time.exec_t = getTimeinMilliSec(); + + // Loop twice over the kernels + for (size_t i = 0; i < 2; i++) { + + // Set the kernel arguments + status = clSetKernelArg(fetch_kernel, 0, sizeof(cl_mem), i == 0 ? (void *)&d_inData : (void *)&d_tmp); + checkError(status, "Failed to set kernel arg 0"); + status = clSetKernelArg(fetch_kernel, 1, sizeof(cl_int), (void*)&mangle_int); + checkError(status, "Failed to set kernel arg 1"); + size_t lws_fetch[] = {N}; + size_t gws_fetch[] = {N * N / 8}; + status = clEnqueueNDRangeKernel(queue1, fetch_kernel, 1, 0, gws_fetch, lws_fetch, 0, NULL, NULL); + checkError(status, "Failed to launch kernel"); + + // Launch the fft kernel - we launch a single work item hence enqueue a task + status = clSetKernelArg(fft_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set kernel arg 0"); + status = clEnqueueTask(queue2, fft_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch kernel"); + + // Set the kernel arguments + status = clSetKernelArg(transpose_kernel, 0, sizeof(cl_mem), i == 0 ? (void *)&d_tmp : (void *)&d_outData); + checkError(status, "Failed to set kernel arg 0"); + + status = clSetKernelArg(transpose_kernel, 1, sizeof(cl_int), (void*)&mangle_int); + checkError(status, "Failed to set kernel arg 1"); + + size_t lws_transpose[] = {N}; + size_t gws_transpose[] = {N * N / 8}; + status = clEnqueueNDRangeKernel(queue3, transpose_kernel, 1, 0, gws_transpose, lws_transpose, 0, NULL, NULL); + checkError(status, "Failed to launch kernel"); + + // Wait for all command queues to complete pending events + status = clFinish(queue1); + checkError(status, "failed to finish"); + status = clFinish(queue2); + checkError(status, "failed to finish"); + status = clFinish(queue3); + checkError(status, "failed to finish"); + } + + fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + + // Copy results from device to host + fft_time.pcie_read_t = getTimeinMilliSec(); + status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * N, out, 0, NULL, NULL); + + status = clFinish(queue1); + checkError(status, "failed to finish reading buffer using PCIe"); + + fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; + checkError(status, "Failed to copy data from device"); + + // Cleanup + if (d_inData) + clReleaseMemObject(d_inData); + if (d_outData) + clReleaseMemObject(d_outData); + if (d_tmp) + clReleaseMemObject(d_tmp); + if(fft_kernel) + clReleaseKernel(fft_kernel); + if(fetch_kernel) + clReleaseKernel(fetch_kernel); + if(transpose_kernel) + clReleaseKernel(transpose_kernel); + queue_cleanup(); + + fft_time.valid = 1; + return fft_time; +} + +/** + * \brief compute an out-of-place single precision complex 2D-FFT using the BRAM of the FPGA + * \param N : integer pointer to size of FFT2d + * \param inp : float2 pointer to input data of size [N * N] + * \param out : float2 pointer to output data of size [N * N] + * \param inv : int toggle to activate backward FFT + * \param interleaving : enable interleaved global memory buffers + * \return fpga_t : time taken in milliseconds for data transfers and execution + */ +fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, bool inv, bool interleaving, int how_many){ + fpga_t fft_time = {0.0, 0.0, 0.0, 0}; + cl_kernel ffta_kernel = NULL, fftb_kernel = NULL; + cl_kernel fetch_kernel = NULL, store_kernel = NULL; + cl_kernel transpose_kernel = NULL; + + cl_int status = 0; + int num_pts = how_many * N * N; + + // if N is not a power of 2 + if(inp == NULL || out == NULL || ( (N & (N-1)) !=0)){ + return fft_time; + } + +#ifdef VERBOSE + printf("Launching%s 3d FFT transform in DDR \n", inv ? " inverse":""); +#endif + + queue_setup(); + + cl_mem_flags flagbuf1, flagbuf2; + if(interleaving == 1){ + flagbuf1 = CL_MEM_READ_WRITE; + flagbuf2 = CL_MEM_READ_WRITE; + } + else{ + flagbuf1 = CL_MEM_READ_ONLY | CL_CHANNEL_1_INTELFPGA; + flagbuf2 = CL_MEM_WRITE_ONLY | CL_CHANNEL_2_INTELFPGA; + } + + // Device memory buffers + cl_mem d_inData, d_outData; + d_inData = clCreateBuffer(context, flagbuf1, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate input device buffer\n"); + + d_outData = clCreateBuffer(context, flagbuf2, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + // Copy data from host to device + fft_time.pcie_write_t = getTimeinMilliSec(); + + status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * num_pts, inp, 0, NULL, NULL); + + status = clFinish(queue1); + checkError(status, "failed to finish"); + + fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; + checkError(status, "Failed to copy data to device"); + + // Can't pass bool to device, so convert it to int + int inverse_int = (int)inv; + + ffta_kernel = clCreateKernel(program, "fft2da", &status); + checkError(status, "Failed to create fft2da kernel"); + + fftb_kernel = clCreateKernel(program, "fft2db", &status); + checkError(status, "Failed to create fft2db kernel"); + + fetch_kernel = clCreateKernel(program, "fetchBitrev", &status); + checkError(status, "Failed to create fetch kernel"); + + transpose_kernel = clCreateKernel(program, "transpose", &status); + checkError(status, "Failed to create transpose1 kernel"); + + store_kernel = clCreateKernel(program, "transposeStore", &status); + checkError(status, "Failed to create store kernel"); + + status = clSetKernelArg(fetch_kernel, 0, sizeof(cl_mem), (void *)&d_inData); + checkError(status, "Failed to set fetch kernel arg 0"); + + status = clSetKernelArg(fetch_kernel, 1, sizeof(cl_int), (void *)&how_many); + checkError(status, "Failed to set fetch kernel arg 1"); + + status = clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set ffta kernel arg 0"); + + status = clSetKernelArg(ffta_kernel, 1, sizeof(cl_int), (void*)&how_many); + checkError(status, "Failed to set ffta kernel arg 1"); + + status = clSetKernelArg(transpose_kernel, 0, sizeof(cl_int), (void*)&how_many); + checkError(status, "Failed to set transpose kernel arg 0"); + + status = clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftb kernel arg 0"); + + status = clSetKernelArg(fftb_kernel, 1, sizeof(cl_int), (void*)&how_many); + checkError(status, "Failed to set fftb kernel arg 1"); + + status = clSetKernelArg(store_kernel, 0, sizeof(cl_mem), (void *)&d_outData); + checkError(status, "Failed to set store kernel arg"); + + status = clSetKernelArg(store_kernel, 1, sizeof(cl_int), (void *)&how_many); + checkError(status, "Failed to set store kernel arg"); + + fft_time.exec_t = getTimeinMilliSec(); + status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose1 kernel"); + + status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second fft kernel"); + + status = clEnqueueTask(queue5, store_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch store kernel"); + + // Wait for all command queues to complete pending events + status = clFinish(queue1); + checkError(status, "failed to finish queue1"); + status = clFinish(queue2); + checkError(status, "failed to finish queue2"); + status = clFinish(queue3); + checkError(status, "failed to finish queue3"); + status = clFinish(queue4); + checkError(status, "failed to finish queue4"); + status = clFinish(queue5); + checkError(status, "failed to finish queue5"); + fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + + // Copy results from device to host + fft_time.pcie_read_t = getTimeinMilliSec(); + status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * num_pts, out, 0, NULL, NULL); + + status = clFinish(queue1); + checkError(status, "failed to finish reading buffer using PCIe"); + + fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; + checkError(status, "Failed to copy data from device"); + + queue_cleanup(); + + if (d_inData) + clReleaseMemObject(d_inData); + if (d_outData) + clReleaseMemObject(d_outData); + + if(fetch_kernel) + clReleaseKernel(fetch_kernel); + + if(ffta_kernel) + clReleaseKernel(ffta_kernel); + if(fftb_kernel) + clReleaseKernel(fftb_kernel); + + if(transpose_kernel) + clReleaseKernel(transpose_kernel); + + if(store_kernel) + clReleaseKernel(store_kernel); + + fft_time.valid = 1; + return fft_time; +} + +/** + * \brief compute an out-of-place single precision complex 2DFFT using the BRAM of the FPGA and Shared Virtual Memory for Host to Device Communication + * \param N : integer pointer to size of FFT2d + * \param inp : float2 pointer to input data of size [N * N] + * \param out : float2 pointer to output data of size [N * N] + * \param inv : int toggle to activate backward FFT + * \return fpga_t : time taken in milliseconds for data transfers and execution + */ +fpga_t fftfpgaf_c2c_2d_bram_svm(int N, float2 *inp, float2 *out, bool inv, int how_many){ + fpga_t fft_time = {0.0, 0.0, 0.0, 0}; + cl_int status = 0; + int num_pts = how_many * N * N; + + cl_kernel ffta_kernel = NULL, fftb_kernel = NULL; + cl_kernel fetch_kernel = NULL, store_kernel = NULL; + cl_kernel transpose_kernel = NULL; + + // if N is not a power of 2 + if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || (!svm_enabled)){ + return fft_time; + } + +#ifdef VERBOSE + printf("Launching%s 2d FFT transform in BRAM using SVM\n", inv ? " inverse":""); +#endif + + queue_setup(); + + // allocate SVM buffers + float2 *h_inData, *h_outData; + h_inData = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); + h_outData = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map input data"); + + // copy data into h_inData + for(size_t i = 0; i < num_pts; i++){ + h_inData[i].x = inp[i].x; + h_inData[i].y = inp[i].y; + } + + status = clEnqueueSVMUnmap(queue1, (void *)h_inData, 0, NULL, NULL); + checkError(status, "Failed to unmap input data"); + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map input data"); + + // copy data into h_inData + for(size_t i = 0; i < num_pts; i++){ + h_outData[i].x = 0.0; + h_outData[i].y = 0.0; + } + + status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); + checkError(status, "Failed to unmap input data"); + + // Can't pass bool to device, so convert it to int + int inverse_int = (int)inv; + + ffta_kernel = clCreateKernel(program, "fft2da", &status); + checkError(status, "Failed to create fft2da kernel"); + + fftb_kernel = clCreateKernel(program, "fft2db", &status); + checkError(status, "Failed to create fft2db kernel"); + + fetch_kernel = clCreateKernel(program, "fetchBitrev", &status); + checkError(status, "Failed to create fetch kernel"); + + transpose_kernel = clCreateKernel(program, "transpose", &status); + checkError(status, "Failed to create transpose1 kernel"); + + store_kernel = clCreateKernel(program, "transposeStore", &status); + checkError(status, "Failed to create store kernel"); + + // write to fetch kernel using SVM based PCIe + status = clSetKernelArgSVMPointer(fetch_kernel, 0, (void *)h_inData); + checkError(status, "Failed to set fetch1 kernel arg"); + + status = clSetKernelArg(fetch_kernel, 1, sizeof(cl_int), (void *)&how_many); + checkError(status, "Failed to set fetch kernel arg 1"); + + status = clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set ffta kernel arg 0"); + + status = clSetKernelArg(ffta_kernel, 1, sizeof(cl_int), (void*)&how_many); + checkError(status, "Failed to set ffta kernel arg 1"); + + status = clSetKernelArg(transpose_kernel, 0, sizeof(cl_int), (void*)&how_many); + checkError(status, "Failed to set transpose kernel arg 0"); + + status = clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftb kernel arg 0"); + + status = clSetKernelArg(fftb_kernel, 1, sizeof(cl_int), (void*)&how_many); + checkError(status, "Failed to set fftb kernel arg 1"); + + // kernel stores using SVM based PCIe to host + status = clSetKernelArgSVMPointer(store_kernel, 0, (void*)h_outData); + checkError(status, "Failed to set store2 kernel arg"); + + status = clSetKernelArg(store_kernel, 1, sizeof(cl_int), (void *)&how_many); + checkError(status, "Failed to set store kernel arg"); + + fft_time.exec_t = getTimeinMilliSec(); + status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose1 kernel"); + + status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second fft kernel"); + + status = clEnqueueTask(queue5, store_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch store kernel"); + + // Wait for all command queues to complete pending events + status = clFinish(queue5); + checkError(status, "failed to finish queue5"); + status = clFinish(queue1); + checkError(status, "failed to finish queue1"); + status = clFinish(queue2); + checkError(status, "failed to finish queue2"); + status = clFinish(queue3); + checkError(status, "failed to finish queue3"); + status = clFinish(queue4); + checkError(status, "failed to finish queue4"); + fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, + (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map out data"); + + for(size_t i = 0; i < num_pts; i++){ + out[i].x = h_outData[i].x; + out[i].y = h_outData[i].y; + } + + status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); + checkError(status, "Failed to unmap out data"); + + if (h_inData) + clSVMFree(context, h_inData); + if (h_outData) + clSVMFree(context, h_outData); + + queue_cleanup(); + + if(fetch_kernel) + clReleaseKernel(fetch_kernel); + if(ffta_kernel) + clReleaseKernel(ffta_kernel); + if(fftb_kernel) + clReleaseKernel(fftb_kernel); + if(transpose_kernel) + clReleaseKernel(transpose_kernel); + if(store_kernel) + clReleaseKernel(store_kernel); + + fft_time.valid = 1; + return fft_time; +} diff --git a/api/src/fft3d.c b/api/src/fft3d.c new file mode 100644 index 0000000..e5e8d51 --- /dev/null +++ b/api/src/fft3d.c @@ -0,0 +1,1352 @@ +// Author: Arjun Ramaswami + +#include +#include +#include +#include +#include +#define CL_VERSION_2_0 +#include // to disable interleaving & transfer data to specific banks - CL_CHANNEL_1_INTELFPGA +#include "CL/opencl.h" + +#include "fpga_state.h" +#include "fftfpga/fftfpga.h" +#include "svm.h" +#include "opencl_utils.h" +#include "misc.h" + + +/** + * \brief compute an out-of-place single precision complex 3D-FFT using the BRAM of the FPGA + * \param N : integer pointer addressing the size of FFT3d + * \param inp : float2 pointer to input data of size [N * N * N] + * \param out : float2 pointer to output data of size [N * N * N] + * \param inv : int toggle to activate backward FFT + * \param interleaving : 1 if using burst interleaved global memory buffers + * \return fpga_t : time taken in milliseconds for data transfers and execution + */ +fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, bool inv, bool interleaving) { + fpga_t fft_time = {0.0, 0.0, 0.0, 0}; + cl_int status = 0; + + cl_kernel fft3da_kernel = NULL, fft3db_kernel = NULL, fft3dc_kernel = NULL; + cl_kernel fetch_kernel = NULL, store_kernel = NULL; + cl_kernel transpose_kernel = NULL, transpose3d_kernel = NULL; + + // if N is not a power of 2 + if(inp == NULL || out == NULL || ( (N & (N-1)) !=0)){ + return fft_time; + } + +#ifdef VERBOSE + printf("Launching%s 3d FFT transform \n", inv ? " inverse":""); +#endif + + queue_setup(); + + cl_mem_flags flagbuf1, flagbuf2; + if(interleaving){ + flagbuf1 = CL_MEM_READ_WRITE; + flagbuf2 = CL_MEM_READ_WRITE; + } + else{ + flagbuf1 = CL_MEM_READ_ONLY | CL_CHANNEL_1_INTELFPGA; + flagbuf2 = CL_MEM_WRITE_ONLY | CL_CHANNEL_2_INTELFPGA; + } + + // Device memory buffers + cl_mem d_inData, d_outData; + d_inData = clCreateBuffer(context, flagbuf1, sizeof(float2) * N * N * N, NULL, &status); + checkError(status, "Failed to allocate input device buffer\n"); + d_outData = clCreateBuffer(context, flagbuf2, sizeof(float2) * N * N * N, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + // Copy data from host to device + fft_time.pcie_write_t = getTimeinMilliSec(); + + status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * N * N * N, inp, 0, NULL, NULL); + + status = clFinish(queue1); + checkError(status, "failed to finish"); + + fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; + checkError(status, "Failed to copy data to device"); + + // Can't pass bool to device, so convert it to int + int inverse_int = (int)inv; + + // Create the kernel - name passed in here must match kernel name in the + // original CL file, that was compiled into an AOCX file using the AOC tool + fetch_kernel = clCreateKernel(program, "fetch", &status); + checkError(status, "Failed to create fetch kernel"); + + fft3da_kernel = clCreateKernel(program, "fft3da", &status); + checkError(status, "Failed to create fft3da kernel"); + + transpose_kernel = clCreateKernel(program, "transpose2d", &status); + checkError(status, "Failed to create transpose kernel"); + + fft3db_kernel = clCreateKernel(program, "fft3db", &status); + checkError(status, "Failed to create fft3db kernel"); + + transpose3d_kernel = clCreateKernel(program, "transpose3D", &status); + checkError(status, "Failed to create transpose3D kernel"); + + fft3dc_kernel = clCreateKernel(program, "fft3dc", &status); + checkError(status, "Failed to create fft3dc kernel"); + + store_kernel = clCreateKernel(program, "store", &status); + checkError(status, "Failed to create store kernel"); + + status = clSetKernelArg(fetch_kernel, 0, sizeof(cl_mem), (void *)&d_inData); + checkError(status, "Failed to set fetch kernel arg 0"); + status = clSetKernelArg(fft3da_kernel, 0, sizeof(cl_int),(void*)&inverse_int); + checkError(status, "Failed to set fft3da kernel arg 0"); + status = clSetKernelArg(fft3db_kernel, 0, sizeof(cl_int),(void*)&inverse_int); + checkError(status, "Failed to set fft3db_kernel arg 0"); + status = clSetKernelArg(fft3dc_kernel, 0, sizeof(cl_int),(void*)&inverse_int); + checkError(status, "Failed to set fft3dc_kernel arg 0"); + status = clSetKernelArg(store_kernel, 0, sizeof(cl_mem), (void *)&d_outData); + checkError(status, "Failed to set store kernel arg 0"); + + fft_time.exec_t = getTimeinMilliSec(); + status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue2, fft3da_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clEnqueueTask(queue4, fft3db_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second fft kernel"); + + status = clEnqueueTask(queue5, transpose3d_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second transpose kernel"); + + status = clEnqueueTask(queue6, fft3dc_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch third fft kernel"); + + status = clEnqueueTask(queue7, store_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch store transpose kernel"); + + // Wait for all command queues to complete pending events + status = clFinish(queue1); + checkError(status, "failed to finish"); + status = clFinish(queue2); + checkError(status, "failed to finish"); + status = clFinish(queue3); + checkError(status, "failed to finish"); + status = clFinish(queue4); + checkError(status, "failed to finish"); + status = clFinish(queue5); + checkError(status, "failed to finish"); + status = clFinish(queue6); + checkError(status, "failed to finish"); + status = clFinish(queue7); + checkError(status, "failed to finish"); + + fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + + // Copy results from device to host + fft_time.pcie_read_t = getTimeinMilliSec(); + status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * N * N, out, 0, NULL, NULL); + + status = clFinish(queue1); + checkError(status, "failed to finish reading buffer using PCIe"); + + fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; + checkError(status, "Failed to copy data from device"); + + queue_cleanup(); + + if (d_inData) + clReleaseMemObject(d_inData); + if (d_outData) + clReleaseMemObject(d_outData); + + if(fetch_kernel) + clReleaseKernel(fetch_kernel); + if(fft3da_kernel) + clReleaseKernel(fft3da_kernel); + if(fft3db_kernel) + clReleaseKernel(fft3db_kernel); + if(fft3dc_kernel) + clReleaseKernel(fft3dc_kernel); + if(transpose_kernel) + clReleaseKernel(transpose_kernel); + if(transpose3d_kernel) + clReleaseKernel(transpose3d_kernel); + if(store_kernel) + clReleaseKernel(store_kernel); + + fft_time.valid = 1; + return fft_time; +} + +/** + * \brief compute an out-of-place single precision complex 3D FFT using the DDR for 3D Transpose where the data access between the host and the FPGA is using Shared Virtual Memory (SVM) + * \param N : integer pointer addressing the size of FFT3d + * \param inp : float2 pointer to input data of size [N * N * N] + * \param out : float2 pointer to output data of size [N * N * N] + * \param inv : int toggle to activate backward FFT + * \param interleaving : 1 if using burst interleaved global memory buffers + * \return fpga_t : time taken in milliseconds for data transfers and execution + */ +fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, float2 *inp, float2 *out, bool inv) { + fpga_t fft_time = {0.0, 0.0, 0.0, 0}; + cl_int status = 0; + int num_pts = N * N * N; + + /* + const char* board_name; + int *bytes; + aocl_mmd_offline_info_t info_id; + info_id = AOCL_MMD_BOARD_NAMES; + aocl_mmd_get_offline_info(info_id, sizeof(char*), &board_name, size_t(int)); + + svm_handle = aocl_mmd_open(board_name); + if(svm_handle < 0 ){ + return NULL; + } + return aocl_mmd_shared_mem_alloc(svm_handle, sz, inData, device_ptr); + */ + + // if N is not a power of 2 + if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || !(svm_enabled)){ + return fft_time; + } + +#ifdef VERBOSE + printf("Launching%s 3d FFT transform in DDR \n", inv ? " inverse":""); +#endif + + // Can't pass bool to device, so convert it to int + int inverse_int = (int)inv; + + // Setup kernels + cl_kernel fetch1_kernel = clCreateKernel(program, "fetchBitrev1", &status); + checkError(status, "Failed to create fetch1 kernel"); + cl_kernel ffta_kernel = clCreateKernel(program, "fft3da", &status); + checkError(status, "Failed to create fft3da kernel"); + cl_kernel transpose_kernel = clCreateKernel(program, "transpose", &status); + checkError(status, "Failed to create transpose kernel"); + cl_kernel fftb_kernel = clCreateKernel(program, "fft3db", &status); + checkError(status, "Failed to create fft3db kernel"); + cl_kernel store1_kernel = clCreateKernel(program, "transposeStore1", &status); + checkError(status, "Failed to create store1 kernel"); + + cl_kernel fetch2_kernel = clCreateKernel(program, "fetchBitrev2", &status); + checkError(status, "Failed to create fetch2 kernel"); + cl_kernel fftc_kernel = clCreateKernel(program, "fft3dc", &status); + checkError(status, "Failed to create fft3dc kernel"); + cl_kernel store2_kernel = clCreateKernel(program, "transposeStore2", &status); + checkError(status, "Failed to create store2 kernel"); + + // Setup Queues to the kernels + queue_setup(); + + // Device memory buffers + cl_mem d_inOutData; + d_inOutData = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + // allocate SVM buffers + float2 *h_inData, *h_outData; + h_inData = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); + h_outData = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map input data"); + + // copy data into h_inData + for(size_t i = 0; i < num_pts; i++){ + h_inData[i].x = inp[i].x; + h_inData[i].y = inp[i].y; + } + + status = clEnqueueSVMUnmap(queue1, (void *)h_inData, 0, NULL, NULL); + checkError(status, "Failed to unmap input data"); + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map input data"); + + // copy data into h_inData + for(size_t i = 0; i < num_pts; i++){ + h_outData[i].x = 0.0; + h_outData[i].y = 0.0; + } + + status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); + checkError(status, "Failed to unmap input data"); + + // write to fetch kernel using SVM based PCIe + status = clSetKernelArgSVMPointer(fetch1_kernel, 0, (void *)h_inData); + checkError(status, "Failed to set fetch1 kernel arg"); + + status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set ffta kernel arg"); + status=clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftb kernel arg"); + + // kernel stores to DDR memory + status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void*)&d_inOutData); + checkError(status, "Failed to set store1 kernel arg"); + + // kernel fetches from DDR memory + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void*)&d_inOutData); + checkError(status, "Failed to set fetch2 kernel arg"); + status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftc kernel arg"); + + // kernel stores using SVM based PCIe to host + status = clSetKernelArgSVMPointer(store2_kernel, 0, (void*)h_outData); + checkError(status, "Failed to set store2 kernel arg"); + + fft_time.exec_t = getTimeinMilliSec(); + double first_half = getTimeinMilliSec(); + status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second fft kernel"); + + status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second transpose kernel"); + + status = clFinish(queue1); + checkError(status, "failed to finish"); + status = clFinish(queue2); + checkError(status, "failed to finish"); + status = clFinish(queue3); + checkError(status, "failed to finish"); + status = clFinish(queue4); + checkError(status, "failed to finish"); + status = clFinish(queue5); + checkError(status, "failed to finish"); + first_half = getTimeinMilliSec() - first_half; + + double second_half = getTimeinMilliSec(); + // enqueue fetch to same queue as the store kernel due to data dependency + status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, store2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clFinish(queue5); + checkError(status, "failed to finish"); + status = clFinish(queue4); + checkError(status, "failed to finish"); + status = clFinish(queue3); + checkError(status, "failed to finish"); + /* + status = clFinish(queue2); + checkError(status, "failed to finish"); + status = clFinish(queue1); + checkError(status, "failed to finish"); + */ + + second_half = getTimeinMilliSec() - second_half; + fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + + printf("First half: %lf Second half: %lf\n\n", first_half, second_half); + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, + (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map out data"); + + for(size_t i = 0; i < num_pts; i++){ + out[i].x = h_outData[i].x; + out[i].y = h_outData[i].y; + } + + status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); + checkError(status, "Failed to unmap out data"); + + if (h_inData) + clSVMFree(context, h_inData); + if (h_outData) + clSVMFree(context, h_outData); + + queue_cleanup(); + + if (d_inOutData) + clReleaseMemObject(d_inOutData); + + if(fetch1_kernel) + clReleaseKernel(fetch1_kernel); + if(fetch2_kernel) + clReleaseKernel(fetch2_kernel); + + if(ffta_kernel) + clReleaseKernel(ffta_kernel); + if(fftb_kernel) + clReleaseKernel(fftb_kernel); + if(fftc_kernel) + clReleaseKernel(fftc_kernel); + + if(transpose_kernel) + clReleaseKernel(transpose_kernel); + + if(store1_kernel) + clReleaseKernel(store1_kernel); + if(store2_kernel) + clReleaseKernel(store2_kernel); + + fft_time.valid = 1; + return fft_time; +} + +/** + * \brief compute an out-of-place single precision complex 3D-FFT using the DDR of the FPGA for 3D Transpose + * \param N : integer pointer addressing the size of FFT3d + * \param inp : float2 pointer to input data of size [N * N * N] + * \param out : float2 pointer to output data of size [N * N * N] + * \param inv : int toggle to activate backward FFT + * \param interleaving : 1 if using burst interleaved global memory buffers + * \return fpga_t : time taken in milliseconds for data transfers and execution + */ +fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, bool inv) { + fpga_t fft_time = {0.0, 0.0, 0.0, 0}; + cl_int status = 0; + int num_pts = N * N * N; + + // if N is not a power of 2 + if(inp == NULL || out == NULL || ( (N & (N-1)) !=0)){ + return fft_time; + } + +#ifdef VERBOSE + printf("Launching%s 3d FFT transform in DDR \n", inv ? " inverse":""); +#endif + + // Can't pass bool to device, so convert it to int + int inverse_int = (int)inv; + + // Setup kernels + cl_kernel fetch1_kernel = clCreateKernel(program, "fetchBitrev1", &status); + checkError(status, "Failed to create fetch1 kernel"); + cl_kernel ffta_kernel = clCreateKernel(program, "fft3da", &status); + checkError(status, "Failed to create fft3da kernel"); + cl_kernel transpose_kernel = clCreateKernel(program, "transpose", &status); + checkError(status, "Failed to create transpose kernel"); + cl_kernel fftb_kernel = clCreateKernel(program, "fft3db", &status); + checkError(status, "Failed to create fft3db kernel"); + cl_kernel store1_kernel = clCreateKernel(program, "transposeStore1", &status); + checkError(status, "Failed to create store1 kernel"); + + cl_kernel fetch2_kernel = clCreateKernel(program, "fetchBitrev2", &status); + checkError(status, "Failed to create fetch2 kernel"); + cl_kernel fftc_kernel = clCreateKernel(program, "fft3dc", &status); + checkError(status, "Failed to create fft3dc kernel"); + cl_kernel store2_kernel = clCreateKernel(program, "transposeStore2", &status); + checkError(status, "Failed to create store2 kernel"); + + // Setup Queues to the kernels + queue_setup(); + + // Device memory buffers + cl_mem d_inData, d_transpose, d_outData; + d_inData = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate input device buffer\n"); + + d_transpose = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + d_outData = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + // Copy data from host to device + fft_time.pcie_write_t = getTimeinMilliSec(); + + status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * num_pts, inp, 0, NULL, NULL); + + status = clFinish(queue1); + checkError(status, "failed to finish"); + + fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; + checkError(status, "Failed to copy data to device"); + + status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData); + checkError(status, "Failed to set fetch1 kernel arg"); + + status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set ffta kernel arg"); + status=clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftb kernel arg"); + + status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_transpose); + checkError(status, "Failed to set store1 kernel arg"); + + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_transpose); + checkError(status, "Failed to set fetch2 kernel arg"); + status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftc kernel arg"); + status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData); + checkError(status, "Failed to set store2 kernel arg"); + + fft_time.exec_t = getTimeinMilliSec(); + status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second fft kernel"); + + status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second transpose kernel"); + + // enqueue fetch to same queue as the store kernel due to data dependency + status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, store2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clFinish(queue5); + checkError(status, "failed to finish"); + status = clFinish(queue4); + checkError(status, "failed to finish"); + status = clFinish(queue3); + checkError(status, "failed to finish"); + status = clFinish(queue2); + checkError(status, "failed to finish"); + status = clFinish(queue1); + checkError(status, "failed to finish"); + + fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + + // Copy results from device to host + fft_time.pcie_read_t = getTimeinMilliSec(); + status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * num_pts, out, 0, NULL, NULL); + + status = clFinish(queue1); + checkError(status, "failed to finish reading DDR using PCIe"); + + fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; + checkError(status, "Failed to copy data from device"); + + queue_cleanup(); + + if (d_inData) + clReleaseMemObject(d_inData); + if (d_outData) + clReleaseMemObject(d_outData); + if (d_transpose) + clReleaseMemObject(d_transpose); + + if(fetch1_kernel) + clReleaseKernel(fetch1_kernel); + if(fetch2_kernel) + clReleaseKernel(fetch2_kernel); + + if(ffta_kernel) + clReleaseKernel(ffta_kernel); + if(fftb_kernel) + clReleaseKernel(fftb_kernel); + if(fftc_kernel) + clReleaseKernel(fftc_kernel); + + if(transpose_kernel) + clReleaseKernel(transpose_kernel); + + if(store1_kernel) + clReleaseKernel(store1_kernel); + if(store2_kernel) + clReleaseKernel(store2_kernel); + + fft_time.valid = 1; + return fft_time; +} + + +/** + * \brief compute an batched out-of-place single precision complex 3D-FFT using the DDR of the FPGA for 3D Transpose + * \param N : integer pointer addressing the size of FFT3d + * \param inp : float2 pointer to input data of size [N * N * N] + * \param out : float2 pointer to output data of size [N * N * N] + * \param inv : int toggle to activate backward FFT + * \param interleaving : enable burst interleaved global memory buffers + * \param how_many : number of batched computations + * \return fpga_t : time taken in milliseconds for data transfers and execution + */ +fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, float2 *inp, float2 *out, bool inv, bool interleaving, int how_many) { + fpga_t fft_time = {0.0, 0.0, 0.0, 0}; + cl_int status = 0; + int num_pts = N * N * N; + + // if N is not a power of 2 + if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || (how_many <= 1)){ + return fft_time; + } + +#ifdef VERBOSE + printf("Launching%s 3d FFT transform in DDR for Batched execution\n", inv ? " inverse":""); +#endif + + // Can't pass bool to device, so convert it to int + int inverse_int = (int)inv; + + // Setup kernels + cl_kernel fetch1_kernel = clCreateKernel(program, "fetchBitrev1", &status); + checkError(status, "Failed to create fetch1 kernel"); + cl_kernel ffta_kernel = clCreateKernel(program, "fft3da", &status); + checkError(status, "Failed to create fft3da kernel"); + cl_kernel transpose_kernel = clCreateKernel(program, "transpose", &status); + checkError(status, "Failed to create transpose kernel"); + cl_kernel fftb_kernel = clCreateKernel(program, "fft3db", &status); + checkError(status, "Failed to create fft3db kernel"); + cl_kernel store1_kernel = clCreateKernel(program, "transposeStore1", &status); + checkError(status, "Failed to create store1 kernel"); + + cl_kernel fetch2_kernel = clCreateKernel(program, "fetchBitrev2", &status); + checkError(status, "Failed to create fetch2 kernel"); + cl_kernel fftc_kernel = clCreateKernel(program, "fft3dc", &status); + checkError(status, "Failed to create fft3dc kernel"); + cl_kernel store2_kernel = clCreateKernel(program, "transposeStore2", &status); + checkError(status, "Failed to create store2 kernel"); + + // Setup Queues to the kernels + queue_setup(); + + // Device memory buffers: using 1st and 2nd banks + // Double Buffers, using 3rd and 4th banks + // a and b are double buffers + cl_mem d_inData1, d_inData2, d_inData3, d_inData4; + cl_mem d_outData1, d_outData2, d_outData3, d_outData4; + + d_inData1 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate input device buffer\n"); + + d_inData2 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate input device buffer\n"); + + d_inData3 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_3_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate input device buffer\n"); + + d_inData4 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_4_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate input device buffer\n"); + + d_outData1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + d_outData2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + d_outData3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_3_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + d_outData4 = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_4_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + cl_mem d_transpose1, d_transpose2, d_transpose3, d_transpose4; + d_transpose1 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + d_transpose2 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + d_transpose3 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_3_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + d_transpose4 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_4_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + // Default Kernel Arguments + status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData1); + checkError(status, "Failed to set fetch1 kernel arg"); + status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set ffta kernel arg"); + status=clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftb kernel arg"); + status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_transpose3); + checkError(status, "Failed to set store1 kernel arg"); + + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_transpose3); + checkError(status, "Failed to set fetch2 kernel arg"); + status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftc kernel arg"); + status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData1); + checkError(status, "Failed to set store2 kernel arg"); + + fft_time.exec_t = getTimeinMilliSec(); + + // First Phase + // Write to DDR first buffer + status = clEnqueueWriteBuffer(queue1, d_inData1, CL_TRUE, 0, sizeof(float2) * num_pts, inp, 0, NULL, NULL); + + status = clFinish(queue1); + checkError(status, "failed to finish"); + + // Second Phase + // Unblocking write to DDR second buffer from index num_pts + cl_event write_event[2]; + //status = clEnqueueWriteBuffer(queue6, d_inData2, CL_TRUE, 0, sizeof(float2) * num_pts, (void*)&inp[num_pts], 0, NULL, NULL); + status = clEnqueueWriteBuffer(queue6, d_inData2, CL_FALSE, 0, sizeof(float2) * num_pts, (void*)&inp[num_pts], 0, NULL, &write_event[0]); + checkError(status, "Failed to write to DDR buffer"); + + // Compute First FFT already transferred + status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second fft kernel"); + + status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second transpose kernel"); + + status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, store2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + // Check finish of transfer and computations + status = clFinish(queue6); + checkError(status, "failed to finish"); + status = clFinish(queue5); + checkError(status, "failed to finish"); + status = clFinish(queue4); + checkError(status, "failed to finish"); + status = clFinish(queue3); + checkError(status, "failed to finish"); + status = clFinish(queue2); + checkError(status, "failed to finish"); + status = clFinish(queue1); + checkError(status, "failed to finish"); + + clWaitForEvents(1, &write_event[0]); + clReleaseEvent(write_event[0]); + + // Loop over the 3 stages + for(size_t i = 0; i < how_many-2; i++){ + + // Unblocking transfers between DDR and host + if( (i % 4) == 0){ + status = clEnqueueWriteBuffer(queue7, d_inData3, CL_FALSE, 0, sizeof(float2) * num_pts, &inp[( (i+2) * num_pts)], 0, NULL, &write_event[1]); + checkError(status, "Failed to write to DDR buffer"); + + status = clEnqueueReadBuffer(queue6, d_outData1, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(i * num_pts)], 0, NULL, &write_event[0]); + checkError(status, "Failed to read from DDR buffer"); + + status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData2); + checkError(status, "Failed to set fetch1 kernel arg"); + + status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_transpose4); + checkError(status, "Failed to set store1 kernel arg"); + + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_transpose4); + + status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData2); + checkError(status, "Failed to set store2 kernel arg"); + } + else if( (i % 4) == 1){ + status = clEnqueueWriteBuffer(queue7, d_inData4, CL_FALSE, 0, sizeof(float2) * num_pts, &inp[((i + 2) * num_pts)], 0, NULL, &write_event[1]); + checkError(status, "Failed to write to DDR buffer"); + + status = clEnqueueReadBuffer(queue6, d_outData2, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(i * num_pts)], 0, NULL, &write_event[0]); + checkError(status, "Failed to read from DDR buffer"); + + status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData3); + checkError(status, "Failed to set fetch1 kernel arg"); + + status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_transpose1); + checkError(status, "Failed to set store1 kernel arg"); + + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_transpose1); + + status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData3); + checkError(status, "Failed to set store2 kernel arg"); + } + else if( (i % 4) == 2){ + status = clEnqueueWriteBuffer(queue7, d_inData1, CL_FALSE, 0, sizeof(float2) * num_pts, &inp[( (i + 2) * num_pts)], 0, NULL, &write_event[1]); + checkError(status, "Failed to write to DDR buffer"); + + status = clEnqueueReadBuffer(queue6, d_outData3, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(i * num_pts)], 0, NULL, &write_event[0]); + checkError(status, "Failed to read from DDR buffer"); + + status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData4); + checkError(status, "Failed to set fetch1 kernel arg"); + + status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_transpose2); + checkError(status, "Failed to set store1 kernel arg"); + + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_transpose2); + + status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData4); + checkError(status, "Failed to set store2 kernel arg"); + } + else{ + status = clEnqueueWriteBuffer(queue7, d_inData2, CL_FALSE, 0, sizeof(float2) * num_pts, &inp[( (i+2) * num_pts)], 0, NULL, &write_event[1]); + checkError(status, "Failed to write to DDR buffer"); + + status = clEnqueueReadBuffer(queue6, d_outData4, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(i * num_pts)], 0, NULL, &write_event[0]); + checkError(status, "Failed to read from DDR buffer"); + + status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData1); + checkError(status, "Failed to set fetch1 kernel arg"); + + status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_transpose3); + checkError(status, "Failed to set store1 kernel arg"); + + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_transpose3); + checkError(status, "Failed to set store1 kernel arg"); + + status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData1); + checkError(status, "Failed to set store2 kernel arg"); + } + + // Set Kernel Arguments before execution + status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second fft kernel"); + + status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second transpose kernel"); + + status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, store2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clFinish(queue7); + checkError(status, "failed to finish"); + status = clFinish(queue6); + checkError(status, "failed to finish"); + status = clFinish(queue5); + checkError(status, "failed to finish"); + status = clFinish(queue4); + checkError(status, "failed to finish"); + status = clFinish(queue3); + checkError(status, "failed to finish"); + status = clFinish(queue2); + checkError(status, "failed to finish"); + status = clFinish(queue1); + checkError(status, "failed to finish"); + + clWaitForEvents(2, write_event); + clReleaseEvent(write_event[0]); + clReleaseEvent(write_event[1]); + } + + if( (how_many % 4) == 0){ + status = clEnqueueReadBuffer(queue6, d_outData3, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(how_many - 2) * num_pts], 0, NULL, &write_event[0]); + checkError(status, "Failed to read from DDR buffer"); + + status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData4); + checkError(status, "Failed to set fetch1 kernel arg"); + + status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_transpose2); + checkError(status, "Failed to set store1 kernel arg"); + + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_transpose2); + checkError(status, "Failed to set store1 kernel arg"); + + status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData4); + checkError(status, "Failed to set store2 kernel arg"); + } + else if((how_many % 4) == 1){ + status = clEnqueueReadBuffer(queue6, d_outData4, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(how_many - 2) * num_pts], 0, NULL, &write_event[0]); + checkError(status, "Failed to read from DDR buffer"); + + status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData1); + checkError(status, "Failed to set fetch1 kernel arg"); + + status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_transpose3); + checkError(status, "Failed to set store1 kernel arg"); + + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_transpose3); + checkError(status, "Failed to set store1 kernel arg"); + + status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData1); + checkError(status, "Failed to set store2 kernel arg"); + } + else if((how_many % 4) == 2){ + status = clEnqueueReadBuffer(queue6, d_outData1, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(how_many - 2) * num_pts], 0, NULL, &write_event[0]); + checkError(status, "Failed to read from DDR buffer"); + + status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData2); + checkError(status, "Failed to set fetch1 kernel arg"); + + status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_transpose4); + checkError(status, "Failed to set store1 kernel arg"); + + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_transpose4); + checkError(status, "Failed to set store1 kernel arg"); + + status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData2); + checkError(status, "Failed to set store2 kernel arg"); + } + else{ + status = clEnqueueReadBuffer(queue6, d_outData2, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(how_many - 2) * num_pts], 0, NULL, &write_event[0]); + checkError(status, "Failed to read from DDR buffer"); + + status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData3); + checkError(status, "Failed to set fetch1 kernel arg"); + + status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_transpose1); + checkError(status, "Failed to set store1 kernel arg"); + + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_transpose1); + checkError(status, "Failed to set store1 kernel arg"); + + status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData3); + checkError(status, "Failed to set store2 kernel arg"); + } + + status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second fft kernel"); + + status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second transpose kernel"); + + status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, store2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clFinish(queue6); + checkError(status, "failed to finish"); + status = clFinish(queue5); + checkError(status, "failed to finish"); + status = clFinish(queue4); + checkError(status, "failed to finish"); + status = clFinish(queue3); + checkError(status, "failed to finish"); + status = clFinish(queue2); + checkError(status, "failed to finish"); + status = clFinish(queue1); + checkError(status, "failed to finish"); + + clWaitForEvents(1, &write_event[0]); + clReleaseEvent(write_event[0]); + + if( (how_many % 4) == 0){ + status = clEnqueueReadBuffer(queue6, d_outData4, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(how_many - 1) * num_pts], 0, NULL, &write_event[0]); + checkError(status, "Failed to read from DDR buffer"); + } + else if((how_many % 4) == 1){ + status = clEnqueueReadBuffer(queue6, d_outData1, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(how_many - 1) * num_pts], 0, NULL, &write_event[0]); + checkError(status, "Failed to read from DDR buffer"); + } + else if((how_many % 4) == 2){ + status = clEnqueueReadBuffer(queue6, d_outData2, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(how_many - 1) * num_pts], 0, NULL, &write_event[0]); + checkError(status, "Failed to read from DDR buffer"); + } + else{ + status = clEnqueueReadBuffer(queue6, d_outData3, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(how_many - 1) * num_pts], 0, NULL, &write_event[0]); + checkError(status, "Failed to read from DDR buffer"); + } + + status = clFinish(queue6); + checkError(status, "failed to finish reading DDR using PCIe"); + + clWaitForEvents(1, &write_event[0]); + clReleaseEvent(write_event[0]); + + fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + checkError(status, "Failed to copy data from device"); + + queue_cleanup(); + + if (d_inData1) + clReleaseMemObject(d_inData1); + if (d_inData2) + clReleaseMemObject(d_inData2); + if (d_inData3) + clReleaseMemObject(d_inData3); + if (d_inData4) + clReleaseMemObject(d_inData4); + + if (d_outData2) + clReleaseMemObject(d_outData2); + if (d_outData2) + clReleaseMemObject(d_outData2); + if (d_outData3) + clReleaseMemObject(d_outData3); + if (d_outData4) + clReleaseMemObject(d_outData4); + + if (d_transpose1) + clReleaseMemObject(d_transpose1); + if (d_transpose2) + clReleaseMemObject(d_transpose2); + if (d_transpose3) + clReleaseMemObject(d_transpose3); + if (d_transpose4) + clReleaseMemObject(d_transpose4); + + if(fetch1_kernel) + clReleaseKernel(fetch1_kernel); + if(fetch2_kernel) + clReleaseKernel(fetch2_kernel); + + if(ffta_kernel) + clReleaseKernel(ffta_kernel); + if(fftb_kernel) + clReleaseKernel(fftb_kernel); + if(fftc_kernel) + clReleaseKernel(fftc_kernel); + + if(transpose_kernel) + clReleaseKernel(transpose_kernel); + + if(store1_kernel) + clReleaseKernel(store1_kernel); + if(store2_kernel) + clReleaseKernel(store2_kernel); + + fft_time.valid = 1; + return fft_time; +} + + +/** + * \brief compute an batched out-of-place single precision complex 3D-FFT using the DDR of the FPGA for 3D Transpose and for data transfers between host's main memory and FPGA using Shared Virtual Memory + * \param N : integer pointer addressing the size of FFT3d + * \param inp : float2 pointer to input data of size [N * N * N] + * \param out : float2 pointer to output data of size [N * N * N] + * \param inv : int toggle to activate backward FFT + * \param how_many : number of batched computations + * \return fpga_t : time taken in milliseconds for data transfers and execution + */ +fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, float2 *inp, float2 *out, bool inv, int how_many) { + fpga_t fft_time = {0.0, 0.0, 0.0, 0}; + cl_int status = 0; + int num_pts = N * N * N; + + // if N is not a power of 2 + if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || (how_many <= 0)){ + return fft_time; + } + + if(!svm_enabled){ + return fft_time; + } + +#ifdef VERBOSE + printf("Launching%s 3d FFT transform in DDR \n", inv ? " inverse":""); +#endif + + // Can't pass bool to device, so convert it to int + int inverse_int = (int)inv; + + // Setup kernels + cl_kernel fetch1_kernel = clCreateKernel(program, "fetchBitrev1", &status); + checkError(status, "Failed to create fetch1 kernel"); + cl_kernel ffta_kernel = clCreateKernel(program, "fft3da", &status); + checkError(status, "Failed to create fft3da kernel"); + cl_kernel transpose_kernel = clCreateKernel(program, "transpose", &status); + checkError(status, "Failed to create transpose kernel"); + cl_kernel fftb_kernel = clCreateKernel(program, "fft3db", &status); + checkError(status, "Failed to create fft3db kernel"); + cl_kernel store1_kernel = clCreateKernel(program, "transposeStore1", &status); + checkError(status, "Failed to create store1 kernel"); + + cl_kernel fetch2_kernel = clCreateKernel(program, "fetchBitrev2", &status); + checkError(status, "Failed to create fetch2 kernel"); + cl_kernel fftc_kernel = clCreateKernel(program, "fft3dc", &status); + checkError(status, "Failed to create fft3dc kernel"); + cl_kernel store2_kernel = clCreateKernel(program, "transposeStore2", &status); + checkError(status, "Failed to create store2 kernel"); + + // Setup Queues to the kernels + queue_setup(); + + // Device memory buffers: double buffers + cl_mem d_outData_0; + d_outData_0 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + cl_mem d_outData_1; + d_outData_1 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + // allocate and initialize SVM buffers + + float2 *h_inData[how_many], *h_outData[how_many]; + for(size_t i = 0; i < how_many; i++){ + h_inData[i] = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); + h_outData[i] = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData[i], sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map input data"); + + // copy data into h_inData + size_t stride = i * num_pts; + for(size_t j = 0; j < num_pts; j++){ + h_inData[i][j].x = inp[stride + j].x; + h_inData[i][j].y = inp[stride + j].y; + } + + status = clEnqueueSVMUnmap(queue1, (void *)h_inData[i], 0, NULL, NULL); + checkError(status, "Failed to unmap input data"); + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_outData[i], sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map input data"); + + // copy data into h_inData + for(size_t j = 0; j < num_pts; j++){ + h_outData[i][j].x = 0.0; + h_outData[i][j].y = 0.0; + } + + status = clEnqueueSVMUnmap(queue1, (void *)h_outData[i], 0, NULL, NULL); + checkError(status, "Failed to unmap input data"); + } + + /* + * kernel arguments + */ + // write to fetch kernel using SVM based PCIe + status = clSetKernelArgSVMPointer(fetch1_kernel, 0, (void *)h_inData[0]); + checkError(status, "Failed to set fetch1 kernel arg"); + + status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set ffta kernel arg"); + status=clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftb kernel arg"); + + // kernel stores to DDR memory + status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_outData_0); + checkError(status, "Failed to set store1 kernel arg"); + + /* + * First batch write phase + */ + fft_time.exec_t = getTimeinMilliSec(); + status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second fft kernel"); + + status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second transpose kernel"); + + for(size_t i = 1; i < how_many; i++){ + + /* + * Read phase of previous iteration + */ + // kernel fetches from DDR memory + // kernel stores using SVM based PCIe to host + if( (i % 2) == 1){ + // if odd number of batches + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData_0); + checkError(status, "Failed to set fetch2 kernel arg"); + + // Start fetch2 phase with same queue as store1 + status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + } + else{ + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData_1); + checkError(status, "Failed to set fetch2 kernel arg"); + + // Start fetch2 phase with same queue as store1 + status = clEnqueueTask(queue8, fetch2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + } + status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftc kernel arg"); + status = clSetKernelArgSVMPointer(store2_kernel, 0, (void *)h_outData[i-1]); + checkError(status, "Failed to set store2 kernel arg"); + + status = clEnqueueTask(queue6, fftc_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue7, store2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + /* + * write phase of current iteration + */ + // change write phase host and ddr ptrs + status = clSetKernelArgSVMPointer(fetch1_kernel, 0, (void *)h_inData[i]); + checkError(status, "Failed to set fetch1 kernel arg"); + if(i % 2 == 1){ + status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_outData_1); + checkError(status, "Failed to set store1 kernel arg"); + } + else{ + status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_outData_0); + checkError(status, "Failed to set store1 kernel arg"); + } + + // Start write phase of current iteration + status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second fft kernel"); + + if(i % 2 == 1){ + status = clEnqueueTask(queue8, store1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second transpose kernel"); + } + else{ + status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second transpose kernel"); + } + } + + if(how_many % 2 == 1){ + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData_0); + checkError(status, "Failed to set fetch2 kernel arg"); + + status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + } + else{ + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData_1); + checkError(status, "Failed to set fetch2 kernel arg"); + status = clEnqueueTask(queue8, fetch2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + } + status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftc kernel arg"); + status = clSetKernelArgSVMPointer(store2_kernel, 0, (void *)h_outData[how_many-1]); + checkError(status, "Failed to set store2 kernel arg"); + + status = clEnqueueTask(queue6, fftc_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue7, store2_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clFinish(queue1); + checkError(status, "Failed to finish queue1"); + status = clFinish(queue2); + checkError(status, "Failed to finish queue2"); + status = clFinish(queue3); + checkError(status, "Failed to finish queue1"); + status = clFinish(queue4); + checkError(status, "Failed to finish queue2"); + status = clFinish(queue5); + checkError(status, "Failed to finish queue1"); + status = clFinish(queue6); + checkError(status, "Failed to finish queue2"); + status = clFinish(queue7); + checkError(status, "Failed to finish queue1"); + status = clFinish(queue8); + checkError(status, "Failed to finish queue2"); + + fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + + for(size_t i = 0; i < how_many; i++){ + + status = clEnqueueSVMMap(queue2, CL_TRUE, CL_MAP_READ, + (void *)h_outData[i], sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map out data"); + + size_t stride = i * num_pts; + for(size_t j = 0; j < num_pts; j++){ + out[stride + j].x = h_outData[i][j].x; + out[stride + j].y = h_outData[i][j].y; + } + + status = clEnqueueSVMUnmap(queue2, (void *)h_outData[i], 0, NULL, NULL); + checkError(status, "Failed to unmap out data"); + } + + for(size_t i = 0; i < how_many; i++){ + clSVMFree(context, h_inData[i]); + clSVMFree(context, h_outData[i]); + } + + queue_cleanup(); + + if (d_outData_0) + clReleaseMemObject(d_outData_0); + if (d_outData_1) + clReleaseMemObject(d_outData_1); + + if(fetch1_kernel) + clReleaseKernel(fetch1_kernel); + if(fetch2_kernel) + clReleaseKernel(fetch2_kernel); + + if(ffta_kernel) + clReleaseKernel(ffta_kernel); + if(fftb_kernel) + clReleaseKernel(fftb_kernel); + if(fftc_kernel) + clReleaseKernel(fftc_kernel); + + if(transpose_kernel) + clReleaseKernel(transpose_kernel); + + if(store1_kernel) + clReleaseKernel(store1_kernel); + if(store2_kernel) + clReleaseKernel(store2_kernel); + + fft_time.valid = 1; + return fft_time; +} diff --git a/api/src/fftfpga.c b/api/src/fftfpga.c index b27a82a..95f3959 100755 --- a/api/src/fftfpga.c +++ b/api/src/fftfpga.c @@ -9,28 +9,22 @@ #include // to disable interleaving & transfer data to specific banks - CL_CHANNEL_1_INTELFPGA #include "CL/opencl.h" +#include "fpga_state.h" #include "fftfpga/fftfpga.h" #include "svm.h" #include "opencl_utils.h" #include "misc.h" -#ifndef KERNEL_VARS -#define KERNEL_VARS -static cl_platform_id platform = NULL; -static cl_device_id *devices; -static cl_device_id device = NULL; -static cl_context context = NULL; -static cl_program program = NULL; -static cl_command_queue queue1 = NULL, queue2 = NULL, queue3 = NULL; -static cl_command_queue queue4 = NULL, queue5 = NULL, queue6 = NULL; -static cl_command_queue queue7 = NULL, queue8 = NULL; - +cl_platform_id platform = NULL; +cl_device_id *devices; +cl_device_id device = NULL; +cl_context context = NULL; +cl_program program = NULL; +cl_command_queue queue1 = NULL, queue2 = NULL, queue3 = NULL; +cl_command_queue queue4 = NULL, queue5 = NULL, queue6 = NULL; +cl_command_queue queue7 = NULL, queue8 = NULL; //static int svm_handle; -static bool svm_enabled = false; -#endif - -static void queue_setup(); -void queue_cleanup(); +bool svm_enabled = false; /** * @brief Allocate memory of double precision complex floating points @@ -155,1383 +149,49 @@ void fpga_final(){ } /** - * \brief compute an out-of-place double precision complex 1D-FFT on the FPGA - * \param N : integer pointer to size of FFT3d - * \param inp : double2 pointer to input data of size N - * \param out : double2 pointer to output data of size N - * \param inv : int toggle to activate backward FFT - * \param batch : number of batched executions of 1D FFT - * \return fpga_t : time taken in milliseconds for data transfers and execution - */ -fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, bool inv, int batch){ - fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - cl_kernel fetch_kernel = NULL, fft_kernel = NULL; - cl_int status = 0; - - // if N is not a power of 2 - if(inp == NULL || out == NULL || ((N & (N-1)) !=0)){ - return fft_time; - } - -#ifdef VERBOSE - printf("Launching%s FFT transform of %d batches \n", inv ? " inverse":"", batch); -#endif - - queue_setup(); - - cl_mem d_inData, d_outData; - d_inData = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(double2) * N * batch, NULL, &status); - checkError(status, "Failed to allocate input device buffer\n"); - - d_outData = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(double2) * N * batch, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); - - // Copy data from host to device - fft_time.pcie_write_t = getTimeinMilliSec(); - - status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(double2) * N * batch, inp, 0, NULL, NULL); - - status = clFinish(queue1); - checkError(status, "failed to finish writing buffer using PCIe"); - - fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; - checkError(status, "Failed to copy data to device"); - - // Can't pass bool to device, so convert it to int - int inverse_int = (int)inv; - - // Create Kernels - names must match the kernel name in the original CL file - fetch_kernel = clCreateKernel(program, "fetch", &status); - checkError(status, "Failed to create fetch kernel"); - - fft_kernel = clCreateKernel(program, "fft1d", &status); - checkError(status, "Failed to create fft1d kernel"); - // Set the kernel arguments - // from here - status = clSetKernelArg(fetch_kernel, 0, sizeof(cl_mem), (void *)&d_inData); - checkError(status, "Failed to set fetch_kernel arg 0"); - status = clSetKernelArg(fft_kernel, 0, sizeof(cl_mem), (void *)&d_outData); - checkError(status, "Failed to set fft_kernel arg 0"); - status = clSetKernelArg(fft_kernel, 1, sizeof(cl_int), (void*)&batch); - checkError(status, "Failed to set fft_kernel arg 1"); - status = clSetKernelArg(fft_kernel, 2, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fft_kernel arg 2"); - - printf(inverse_int ? "\tInverse FFT" : "\tFFT"); - printf(" kernel initialization is complete.\n"); - - size_t ls = N/8; - size_t gs = batch * ls; - - // Measure execution time - fft_time.exec_t = getTimeinMilliSec(); - - // FFT1d kernel is the SWI kernel - status = clEnqueueTask(queue1, fft_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft1d kernel"); - - status = clEnqueueNDRangeKernel(queue2, fetch_kernel, 1, NULL, &gs, &ls, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - - // Wait for command queue to complete pending events - status = clFinish(queue1); - checkError(status, "Failed to finish queue1"); - status = clFinish(queue2); - checkError(status, "Failed to finish queue2"); - - // Record execution time - fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; - - // Copy results from device to host - fft_time.pcie_read_t = getTimeinMilliSec(); - status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * batch, out, 0, NULL, NULL); - - status = clFinish(queue1); - checkError(status, "failed to finish reading buffer using PCIe"); - - fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; - checkError(status, "Failed to copy data from device"); - - // Cleanup - if (d_inData) - clReleaseMemObject(d_inData); - if (d_outData) - clReleaseMemObject(d_outData); - if(fetch_kernel) - clReleaseKernel(fetch_kernel); - if(fft_kernel) - clReleaseKernel(fft_kernel); - queue_cleanup(); - fft_time.valid = 1; - return fft_time; -} - -/** - * \brief compute an out-of-place single precision complex 1D-FFT on the FPGA - * \param N : integer pointer to size of FFT3d - * \param inp : float2 pointer to input data of size N - * \param out : float2 pointer to output data of size N - * \param inv : true for backward transforms - * \param batch : number of batched executions of 1D FFT - * \return fpga_t : time taken in milliseconds for data transfers and execution - */ -fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, bool inv, int batch){ - - fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - cl_kernel kernel1 = NULL, kernel2 = NULL; - cl_int status = 0; - - // if N is not a power of 2 - if(inp == NULL || out == NULL || ( (N & (N-1)) !=0)){ - return fft_time; - } - -#ifdef VERBOSE - printf("Launching%s FFT transform for %d batch \n", inv ? " inverse":"", batch); -#endif - - queue_setup(); - - cl_mem d_inData, d_outData; - printf("Launching%s FFT transform for %d batch \n", inv ? " inverse":"", batch); - - // Create device buffers - assign the buffers in different banks for more efficient memory access - d_inData = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float2) * N * batch, NULL, &status); - checkError(status, "Failed to allocate input device buffer\n"); - - d_outData = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * N * batch, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); - - // Copy data from host to device - fft_time.pcie_write_t = getTimeinMilliSec(); - - status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * N * batch, inp, 0, NULL, NULL); - - status = clFinish(queue1); - checkError(status, "failed to finish writing buffer using PCIe"); - - fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; - checkError(status, "Failed to copy data to device"); - - // Can't pass bool to device, so convert it to int - int inverse_int = (int)inv; - - // Create Kernels - names must match the kernel name in the original CL file - kernel1 = clCreateKernel(program, "fetch", &status); - checkError(status, "Failed to create fetch kernel"); - - kernel2 = clCreateKernel(program, "fft1d", &status); - checkError(status, "Failed to create fft1d kernel"); - // Set the kernel arguments - status = clSetKernelArg(kernel1, 0, sizeof(cl_mem), (void *)&d_inData); - checkError(status, "Failed to set kernel1 arg 0"); - status = clSetKernelArg(kernel2, 0, sizeof(cl_mem), (void *)&d_outData); - checkError(status, "Failed to set kernel arg 0"); - status = clSetKernelArg(kernel2, 1, sizeof(cl_int), (void*)&batch); - checkError(status, "Failed to set kernel arg 1"); - status = clSetKernelArg(kernel2, 2, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set kernel arg 2"); - - printf(inverse_int ? "\tInverse FFT" : "\tFFT"); - printf(" kernel initialization is complete.\n"); - - size_t ls = N/8; - size_t gs = batch * ls; - - // Measure execution time - fft_time.exec_t = getTimeinMilliSec(); - - // Launch the kernel - we launch a single work item hence enqueue a task - // FFT1d kernel is the SWI kernel - status = clEnqueueTask(queue1, kernel2, 0, NULL, NULL); - checkError(status, "Failed to launch fft1d kernel"); - - status = clEnqueueNDRangeKernel(queue2, kernel1, 1, NULL, &gs, &ls, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - - // Wait for command queue to complete pending events - status = clFinish(queue1); - checkError(status, "Failed to finish queue1"); - status = clFinish(queue2); - checkError(status, "Failed to finish queue2"); - - // Record execution time - fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; - - // Copy results from device to host - fft_time.pcie_read_t = getTimeinMilliSec(); - status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * batch, out, 0, NULL, NULL); - - status = clFinish(queue1); - checkError(status, "failed to finish reading buffer using PCIe"); - - fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; - checkError(status, "Failed to copy data from device"); - - // Cleanup - if (d_inData) - clReleaseMemObject(d_inData); - if (d_outData) - clReleaseMemObject(d_outData); - if(kernel1) - clReleaseKernel(kernel1); - if(kernel2) - clReleaseKernel(kernel2); - queue_cleanup(); - - fft_time.valid = 1; - return fft_time; -} - -/** - * \brief compute an out-of-place single precision complex 1D-FFT on the FPGA using Shared Virtual Memory for data transfers between host's main memory and FPGA - * \param N : integer pointer to size of FFT3d - * \param inp : float2 pointer to input data of size N - * \param out : float2 pointer to output data of size N - * \param inv : int toggle to activate backward FFT - * \param batch : number of batched executions of 1D FFT - * \return fpga_t : time taken in milliseconds for data transfers and execution - */ -fpga_t fftfpgaf_c2c_1d_svm(int N, float2 *inp, float2 *out, bool inv, int batch){ - fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - cl_int status = 0; - int num_pts = N * batch; - - // if N is not a power of 2 - if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || !(svm_enabled)){ - return fft_time; - } - -#ifdef VERBOSE - printf("Launching%s 1D FFT transform in DDR \n", inv ? " inverse":""); -#endif - - // Can't pass bool to device, so convert it to int - int inverse_int = (int)inv; - - // Setup kernels - cl_kernel fetch_kernel = clCreateKernel(program, "fetch", &status); - checkError(status, "Failed to create fetch1 kernel"); - cl_kernel fft_kernel = clCreateKernel(program, "fft1d", &status); - checkError(status, "Failed to create fft3da kernel"); - - // Setup Queues to the kernels - queue_setup(); - - // allocate SVM buffers - float2 *h_inData, *h_outData; - h_inData = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); - h_outData = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); - - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData, sizeof(float2) * num_pts, 0, NULL, NULL); - checkError(status, "Failed to map input data"); - - // copy data into h_inData - for(size_t i = 0; i < num_pts; i++){ - h_inData[i].x = inp[i].x; - h_inData[i].y = inp[i].y; - } - - status = clEnqueueSVMUnmap(queue1, (void *)h_inData, 0, NULL, NULL); - checkError(status, "Failed to unmap input data"); - - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); - checkError(status, "Failed to map input data"); - - // copy data into h_inData - for(size_t i = 0; i < num_pts; i++){ - h_outData[i].x = 0.0; - h_outData[i].y = 0.0; - } - - status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); - checkError(status, "Failed to unmap input data"); - - // write to fetch kernel using SVM based PCIe - status = clSetKernelArgSVMPointer(fetch_kernel, 0, (void *)h_inData); - checkError(status, "Failed to set fetch kernel arg"); - - // kernel transforms and stores to DDR memory - status = clSetKernelArgSVMPointer(fft_kernel, 0, (void *)h_outData); - checkError(status, "Failed to set store2 kernel arg"); - - status=clSetKernelArg(fft_kernel, 1, sizeof(cl_int), (void*)&batch); - checkError(status, "Failed to set fft kernel arg"); - - status=clSetKernelArg(fft_kernel, 2, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fft kernel arg"); - - size_t ls = N/8; - size_t gs = batch * ls; - - fft_time.exec_t = getTimeinMilliSec(); - status = clEnqueueTask(queue1, fft_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - - status = clEnqueueNDRangeKernel(queue2, fetch_kernel, 1, NULL, &gs, &ls, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - - status = clFinish(queue2); - checkError(status, "failed to finish"); - status = clFinish(queue1); - checkError(status, "failed to finish"); - - fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; - - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, - (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); - checkError(status, "Failed to map out data"); - - for(size_t i = 0; i < num_pts; i++){ - out[i].x = h_outData[i].x; - out[i].y = h_outData[i].y; - } - - status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); - checkError(status, "Failed to unmap out data"); - - if (h_inData) - clSVMFree(context, h_inData); - if (h_outData) - clSVMFree(context, h_outData); - - queue_cleanup(); - - if(fetch_kernel) - clReleaseKernel(fetch_kernel); - - if(fft_kernel) - clReleaseKernel(fft_kernel); - - fft_time.valid = 1; - return fft_time; -} - -/** - * \brief compute an out-of-place single precision complex 2D-FFT using the DDR of the FPGA - * \param N : integer pointer to size of FFT2d - * \param inp : float2 pointer to input data of size [N * N] - * \param out : float2 pointer to output data of size [N * N] - * \param inv : int toggle to activate backward FFT - * \param iter : int toggle to activate backward FFT - * \return fpga_t : time taken in milliseconds for data transfers and execution - */ -fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, bool inv){ - fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - cl_kernel fetch_kernel = NULL, fft_kernel = NULL, transpose_kernel = NULL; - cl_int status = 0; - int mangle_int = 0; - - // if N is not a power of 2 - if(inp == NULL || out == NULL || ( (N & (N-1)) !=0)){ - return fft_time; - } - -#ifdef VERBOSE - printf("Launching%s 2d FFT transform \n", inv ? " inverse":""); -#endif - - queue_setup(); - - cl_mem d_inData, d_outData, d_tmp; - - d_inData = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float2) * N * N, NULL, &status); - checkError(status, "Failed to allocate input device buffer\n"); - d_outData = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float2) * N * N, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); - d_tmp = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * N * N, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); - - // Copy data from host to device - fft_time.pcie_write_t = getTimeinMilliSec(); - - status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * N * N, inp, 0, NULL, NULL); - - status = clFinish(queue1); - checkError(status, "failed to finish writing buffer using PCIe"); - - fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; - checkError(status, "Failed to copy data to device"); - - // Can't pass bool to device, so convert it to int - int inverse_int = (int)inv; - - // Create Kernels - names must match the kernel name in the original CL file - fft_kernel = clCreateKernel(program, "fft2d", &status); - checkError(status, "Failed to create kernel"); - fetch_kernel = clCreateKernel(program, "fetch", &status); - checkError(status, "Failed to create kernel"); - transpose_kernel = clCreateKernel(program, "transpose", &status); - checkError(status, "Failed to create kernel"); - - // Record execution time - fft_time.exec_t = getTimeinMilliSec(); - - // Loop twice over the kernels - for (size_t i = 0; i < 2; i++) { - - // Set the kernel arguments - status = clSetKernelArg(fetch_kernel, 0, sizeof(cl_mem), i == 0 ? (void *)&d_inData : (void *)&d_tmp); - checkError(status, "Failed to set kernel arg 0"); - status = clSetKernelArg(fetch_kernel, 1, sizeof(cl_int), (void*)&mangle_int); - checkError(status, "Failed to set kernel arg 1"); - size_t lws_fetch[] = {N}; - size_t gws_fetch[] = {N * N / 8}; - status = clEnqueueNDRangeKernel(queue1, fetch_kernel, 1, 0, gws_fetch, lws_fetch, 0, NULL, NULL); - checkError(status, "Failed to launch kernel"); - - // Launch the fft kernel - we launch a single work item hence enqueue a task - status = clSetKernelArg(fft_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set kernel arg 0"); - status = clEnqueueTask(queue2, fft_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch kernel"); - - // Set the kernel arguments - status = clSetKernelArg(transpose_kernel, 0, sizeof(cl_mem), i == 0 ? (void *)&d_tmp : (void *)&d_outData); - checkError(status, "Failed to set kernel arg 0"); - - status = clSetKernelArg(transpose_kernel, 1, sizeof(cl_int), (void*)&mangle_int); - checkError(status, "Failed to set kernel arg 1"); - - size_t lws_transpose[] = {N}; - size_t gws_transpose[] = {N * N / 8}; - status = clEnqueueNDRangeKernel(queue3, transpose_kernel, 1, 0, gws_transpose, lws_transpose, 0, NULL, NULL); - checkError(status, "Failed to launch kernel"); - - // Wait for all command queues to complete pending events - status = clFinish(queue1); - checkError(status, "failed to finish"); - status = clFinish(queue2); - checkError(status, "failed to finish"); - status = clFinish(queue3); - checkError(status, "failed to finish"); - } - - fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; - - // Copy results from device to host - fft_time.pcie_read_t = getTimeinMilliSec(); - status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * N, out, 0, NULL, NULL); - - status = clFinish(queue1); - checkError(status, "failed to finish reading buffer using PCIe"); - - fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; - checkError(status, "Failed to copy data from device"); - - // Cleanup - if (d_inData) - clReleaseMemObject(d_inData); - if (d_outData) - clReleaseMemObject(d_outData); - if (d_tmp) - clReleaseMemObject(d_tmp); - if(fft_kernel) - clReleaseKernel(fft_kernel); - if(fetch_kernel) - clReleaseKernel(fetch_kernel); - if(transpose_kernel) - clReleaseKernel(transpose_kernel); - queue_cleanup(); - - fft_time.valid = 1; - return fft_time; -} - -/** - * \brief compute an out-of-place single precision complex 2D-FFT using the BRAM of the FPGA - * \param N : integer pointer to size of FFT2d - * \param inp : float2 pointer to input data of size [N * N] - * \param out : float2 pointer to output data of size [N * N] - * \param inv : int toggle to activate backward FFT - * \param interleaving : enable interleaved global memory buffers - * \return fpga_t : time taken in milliseconds for data transfers and execution + * \brief Create a command queue for each kernel */ -fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, bool inv, bool interleaving, int how_many){ - fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - cl_kernel ffta_kernel = NULL, fftb_kernel = NULL; - cl_kernel fetch_kernel = NULL, store_kernel = NULL; - cl_kernel transpose_kernel = NULL; - +void queue_setup(){ cl_int status = 0; - int num_pts = how_many * N * N; - - // if N is not a power of 2 - if(inp == NULL || out == NULL || ( (N & (N-1)) !=0)){ - return fft_time; - } - -#ifdef VERBOSE - printf("Launching%s 3d FFT transform in DDR \n", inv ? " inverse":""); -#endif - - queue_setup(); - - cl_mem_flags flagbuf1, flagbuf2; - if(interleaving == 1){ - flagbuf1 = CL_MEM_READ_WRITE; - flagbuf2 = CL_MEM_READ_WRITE; - } - else{ - flagbuf1 = CL_MEM_READ_ONLY | CL_CHANNEL_1_INTELFPGA; - flagbuf2 = CL_MEM_WRITE_ONLY | CL_CHANNEL_2_INTELFPGA; - } - - // Device memory buffers - cl_mem d_inData, d_outData; - d_inData = clCreateBuffer(context, flagbuf1, sizeof(float2) * num_pts, NULL, &status); - checkError(status, "Failed to allocate input device buffer\n"); - - d_outData = clCreateBuffer(context, flagbuf2, sizeof(float2) * num_pts, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); - - // Copy data from host to device - fft_time.pcie_write_t = getTimeinMilliSec(); - - status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * num_pts, inp, 0, NULL, NULL); - - status = clFinish(queue1); - checkError(status, "failed to finish"); - - fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; - checkError(status, "Failed to copy data to device"); - - // Can't pass bool to device, so convert it to int - int inverse_int = (int)inv; - - ffta_kernel = clCreateKernel(program, "fft2da", &status); - checkError(status, "Failed to create fft2da kernel"); - - fftb_kernel = clCreateKernel(program, "fft2db", &status); - checkError(status, "Failed to create fft2db kernel"); - - fetch_kernel = clCreateKernel(program, "fetchBitrev", &status); - checkError(status, "Failed to create fetch kernel"); - - transpose_kernel = clCreateKernel(program, "transpose", &status); - checkError(status, "Failed to create transpose1 kernel"); - - store_kernel = clCreateKernel(program, "transposeStore", &status); - checkError(status, "Failed to create store kernel"); - - status = clSetKernelArg(fetch_kernel, 0, sizeof(cl_mem), (void *)&d_inData); - checkError(status, "Failed to set fetch kernel arg 0"); - - status = clSetKernelArg(fetch_kernel, 1, sizeof(cl_int), (void *)&how_many); - checkError(status, "Failed to set fetch kernel arg 1"); - - status = clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set ffta kernel arg 0"); - - status = clSetKernelArg(ffta_kernel, 1, sizeof(cl_int), (void*)&how_many); - checkError(status, "Failed to set ffta kernel arg 1"); - - status = clSetKernelArg(transpose_kernel, 0, sizeof(cl_int), (void*)&how_many); - checkError(status, "Failed to set transpose kernel arg 0"); - - status = clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fftb kernel arg 0"); - - status = clSetKernelArg(fftb_kernel, 1, sizeof(cl_int), (void*)&how_many); - checkError(status, "Failed to set fftb kernel arg 1"); - - status = clSetKernelArg(store_kernel, 0, sizeof(cl_mem), (void *)&d_outData); - checkError(status, "Failed to set store kernel arg"); - - status = clSetKernelArg(store_kernel, 1, sizeof(cl_int), (void *)&how_many); - checkError(status, "Failed to set store kernel arg"); - - fft_time.exec_t = getTimeinMilliSec(); - status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - - status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose1 kernel"); - - status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second fft kernel"); - - status = clEnqueueTask(queue5, store_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch store kernel"); - - // Wait for all command queues to complete pending events - status = clFinish(queue1); - checkError(status, "failed to finish queue1"); - status = clFinish(queue2); - checkError(status, "failed to finish queue2"); - status = clFinish(queue3); - checkError(status, "failed to finish queue3"); - status = clFinish(queue4); - checkError(status, "failed to finish queue4"); - status = clFinish(queue5); - checkError(status, "failed to finish queue5"); - fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; - - // Copy results from device to host - fft_time.pcie_read_t = getTimeinMilliSec(); - status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * num_pts, out, 0, NULL, NULL); - - status = clFinish(queue1); - checkError(status, "failed to finish reading buffer using PCIe"); - - fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; - checkError(status, "Failed to copy data from device"); - - queue_cleanup(); - - if (d_inData) - clReleaseMemObject(d_inData); - if (d_outData) - clReleaseMemObject(d_outData); - - if(fetch_kernel) - clReleaseKernel(fetch_kernel); - - if(ffta_kernel) - clReleaseKernel(ffta_kernel); - if(fftb_kernel) - clReleaseKernel(fftb_kernel); - - if(transpose_kernel) - clReleaseKernel(transpose_kernel); - - if(store_kernel) - clReleaseKernel(store_kernel); - - fft_time.valid = 1; - return fft_time; -} - -/** - * \brief compute an out-of-place single precision complex 2DFFT using the BRAM of the FPGA and Shared Virtual Memory for Host to Device Communication - * \param N : integer pointer to size of FFT2d - * \param inp : float2 pointer to input data of size [N * N] - * \param out : float2 pointer to output data of size [N * N] - * \param inv : int toggle to activate backward FFT - * \return fpga_t : time taken in milliseconds for data transfers and execution - */ -fpga_t fftfpgaf_c2c_2d_bram_svm(int N, float2 *inp, float2 *out, bool inv, int how_many){ - fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - cl_int status = 0; - int num_pts = how_many * N * N; - - cl_kernel ffta_kernel = NULL, fftb_kernel = NULL; - cl_kernel fetch_kernel = NULL, store_kernel = NULL; - cl_kernel transpose_kernel = NULL; - - // if N is not a power of 2 - if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || (!svm_enabled)){ - return fft_time; - } - -#ifdef VERBOSE - printf("Launching%s 2d FFT transform in BRAM using SVM\n", inv ? " inverse":""); -#endif - - queue_setup(); - - // allocate SVM buffers - float2 *h_inData, *h_outData; - h_inData = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); - h_outData = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); - - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData, sizeof(float2) * num_pts, 0, NULL, NULL); - checkError(status, "Failed to map input data"); - - // copy data into h_inData - for(size_t i = 0; i < num_pts; i++){ - h_inData[i].x = inp[i].x; - h_inData[i].y = inp[i].y; - } - - status = clEnqueueSVMUnmap(queue1, (void *)h_inData, 0, NULL, NULL); - checkError(status, "Failed to unmap input data"); - - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); - checkError(status, "Failed to map input data"); - - // copy data into h_inData - for(size_t i = 0; i < num_pts; i++){ - h_outData[i].x = 0.0; - h_outData[i].y = 0.0; - } - - status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); - checkError(status, "Failed to unmap input data"); - - // Can't pass bool to device, so convert it to int - int inverse_int = (int)inv; - - ffta_kernel = clCreateKernel(program, "fft2da", &status); - checkError(status, "Failed to create fft2da kernel"); - - fftb_kernel = clCreateKernel(program, "fft2db", &status); - checkError(status, "Failed to create fft2db kernel"); - - fetch_kernel = clCreateKernel(program, "fetchBitrev", &status); - checkError(status, "Failed to create fetch kernel"); - - transpose_kernel = clCreateKernel(program, "transpose", &status); - checkError(status, "Failed to create transpose1 kernel"); - - store_kernel = clCreateKernel(program, "transposeStore", &status); - checkError(status, "Failed to create store kernel"); - - // write to fetch kernel using SVM based PCIe - status = clSetKernelArgSVMPointer(fetch_kernel, 0, (void *)h_inData); - checkError(status, "Failed to set fetch1 kernel arg"); - - status = clSetKernelArg(fetch_kernel, 1, sizeof(cl_int), (void *)&how_many); - checkError(status, "Failed to set fetch kernel arg 1"); - - status = clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set ffta kernel arg 0"); - - status = clSetKernelArg(ffta_kernel, 1, sizeof(cl_int), (void*)&how_many); - checkError(status, "Failed to set ffta kernel arg 1"); - - status = clSetKernelArg(transpose_kernel, 0, sizeof(cl_int), (void*)&how_many); - checkError(status, "Failed to set transpose kernel arg 0"); - - status = clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fftb kernel arg 0"); - - status = clSetKernelArg(fftb_kernel, 1, sizeof(cl_int), (void*)&how_many); - checkError(status, "Failed to set fftb kernel arg 1"); - - // kernel stores using SVM based PCIe to host - status = clSetKernelArgSVMPointer(store_kernel, 0, (void*)h_outData); - checkError(status, "Failed to set store2 kernel arg"); - - status = clSetKernelArg(store_kernel, 1, sizeof(cl_int), (void *)&how_many); - checkError(status, "Failed to set store kernel arg"); - - fft_time.exec_t = getTimeinMilliSec(); - status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - - status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose1 kernel"); - - status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second fft kernel"); - - status = clEnqueueTask(queue5, store_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch store kernel"); - - // Wait for all command queues to complete pending events - status = clFinish(queue5); - checkError(status, "failed to finish queue5"); - status = clFinish(queue1); - checkError(status, "failed to finish queue1"); - status = clFinish(queue2); - checkError(status, "failed to finish queue2"); - status = clFinish(queue3); - checkError(status, "failed to finish queue3"); - status = clFinish(queue4); - checkError(status, "failed to finish queue4"); - fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; - - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, - (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); - checkError(status, "Failed to map out data"); - - for(size_t i = 0; i < num_pts; i++){ - out[i].x = h_outData[i].x; - out[i].y = h_outData[i].y; - } - - status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); - checkError(status, "Failed to unmap out data"); - - if (h_inData) - clSVMFree(context, h_inData); - if (h_outData) - clSVMFree(context, h_outData); - - queue_cleanup(); - - if(fetch_kernel) - clReleaseKernel(fetch_kernel); - if(ffta_kernel) - clReleaseKernel(ffta_kernel); - if(fftb_kernel) - clReleaseKernel(fftb_kernel); - if(transpose_kernel) - clReleaseKernel(transpose_kernel); - if(store_kernel) - clReleaseKernel(store_kernel); - - fft_time.valid = 1; - return fft_time; -} - -/** - * \brief compute an out-of-place single precision complex 3D-FFT using the BRAM of the FPGA - * \param N : integer pointer addressing the size of FFT3d - * \param inp : float2 pointer to input data of size [N * N * N] - * \param out : float2 pointer to output data of size [N * N * N] - * \param inv : int toggle to activate backward FFT - * \param interleaving : 1 if using burst interleaved global memory buffers - * \return fpga_t : time taken in milliseconds for data transfers and execution - */ -fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, bool inv, bool interleaving) { - fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - cl_int status = 0; - - cl_kernel fft3da_kernel = NULL, fft3db_kernel = NULL, fft3dc_kernel = NULL; - cl_kernel fetch_kernel = NULL, store_kernel = NULL; - cl_kernel transpose_kernel = NULL, transpose3d_kernel = NULL; - - // if N is not a power of 2 - if(inp == NULL || out == NULL || ( (N & (N-1)) !=0)){ - return fft_time; - } - -#ifdef VERBOSE - printf("Launching%s 3d FFT transform \n", inv ? " inverse":""); -#endif - - queue_setup(); - - cl_mem_flags flagbuf1, flagbuf2; - if(interleaving){ - flagbuf1 = CL_MEM_READ_WRITE; - flagbuf2 = CL_MEM_READ_WRITE; - } - else{ - flagbuf1 = CL_MEM_READ_ONLY | CL_CHANNEL_1_INTELFPGA; - flagbuf2 = CL_MEM_WRITE_ONLY | CL_CHANNEL_2_INTELFPGA; - } - - // Device memory buffers - cl_mem d_inData, d_outData; - d_inData = clCreateBuffer(context, flagbuf1, sizeof(float2) * N * N * N, NULL, &status); - checkError(status, "Failed to allocate input device buffer\n"); - d_outData = clCreateBuffer(context, flagbuf2, sizeof(float2) * N * N * N, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); - - // Copy data from host to device - fft_time.pcie_write_t = getTimeinMilliSec(); - - status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * N * N * N, inp, 0, NULL, NULL); - - status = clFinish(queue1); - checkError(status, "failed to finish"); - - fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; - checkError(status, "Failed to copy data to device"); - - // Can't pass bool to device, so convert it to int - int inverse_int = (int)inv; - - // Create the kernel - name passed in here must match kernel name in the - // original CL file, that was compiled into an AOCX file using the AOC tool - fetch_kernel = clCreateKernel(program, "fetch", &status); - checkError(status, "Failed to create fetch kernel"); - - fft3da_kernel = clCreateKernel(program, "fft3da", &status); - checkError(status, "Failed to create fft3da kernel"); - - transpose_kernel = clCreateKernel(program, "transpose2d", &status); - checkError(status, "Failed to create transpose kernel"); - - fft3db_kernel = clCreateKernel(program, "fft3db", &status); - checkError(status, "Failed to create fft3db kernel"); - - transpose3d_kernel = clCreateKernel(program, "transpose3D", &status); - checkError(status, "Failed to create transpose3D kernel"); - - fft3dc_kernel = clCreateKernel(program, "fft3dc", &status); - checkError(status, "Failed to create fft3dc kernel"); - - store_kernel = clCreateKernel(program, "store", &status); - checkError(status, "Failed to create store kernel"); - - status = clSetKernelArg(fetch_kernel, 0, sizeof(cl_mem), (void *)&d_inData); - checkError(status, "Failed to set fetch kernel arg 0"); - status = clSetKernelArg(fft3da_kernel, 0, sizeof(cl_int),(void*)&inverse_int); - checkError(status, "Failed to set fft3da kernel arg 0"); - status = clSetKernelArg(fft3db_kernel, 0, sizeof(cl_int),(void*)&inverse_int); - checkError(status, "Failed to set fft3db_kernel arg 0"); - status = clSetKernelArg(fft3dc_kernel, 0, sizeof(cl_int),(void*)&inverse_int); - checkError(status, "Failed to set fft3dc_kernel arg 0"); - status = clSetKernelArg(store_kernel, 0, sizeof(cl_mem), (void *)&d_outData); - checkError(status, "Failed to set store kernel arg 0"); - - fft_time.exec_t = getTimeinMilliSec(); - status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - - status = clEnqueueTask(queue2, fft3da_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); - - status = clEnqueueTask(queue4, fft3db_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second fft kernel"); - - status = clEnqueueTask(queue5, transpose3d_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second transpose kernel"); - - status = clEnqueueTask(queue6, fft3dc_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch third fft kernel"); - - status = clEnqueueTask(queue7, store_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch store transpose kernel"); - - // Wait for all command queues to complete pending events - status = clFinish(queue1); - checkError(status, "failed to finish"); - status = clFinish(queue2); - checkError(status, "failed to finish"); - status = clFinish(queue3); - checkError(status, "failed to finish"); - status = clFinish(queue4); - checkError(status, "failed to finish"); - status = clFinish(queue5); - checkError(status, "failed to finish"); - status = clFinish(queue6); - checkError(status, "failed to finish"); - status = clFinish(queue7); - checkError(status, "failed to finish"); - - fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; - - // Copy results from device to host - fft_time.pcie_read_t = getTimeinMilliSec(); - status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * N * N, out, 0, NULL, NULL); - - status = clFinish(queue1); - checkError(status, "failed to finish reading buffer using PCIe"); - - fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; - checkError(status, "Failed to copy data from device"); - - queue_cleanup(); - - if (d_inData) - clReleaseMemObject(d_inData); - if (d_outData) - clReleaseMemObject(d_outData); - - if(fetch_kernel) - clReleaseKernel(fetch_kernel); - if(fft3da_kernel) - clReleaseKernel(fft3da_kernel); - if(fft3db_kernel) - clReleaseKernel(fft3db_kernel); - if(fft3dc_kernel) - clReleaseKernel(fft3dc_kernel); - if(transpose_kernel) - clReleaseKernel(transpose_kernel); - if(transpose3d_kernel) - clReleaseKernel(transpose3d_kernel); - if(store_kernel) - clReleaseKernel(store_kernel); - - fft_time.valid = 1; - return fft_time; -} - -/** - * \brief compute an out-of-place single precision complex 3D FFT using the DDR for 3D Transpose where the data access between the host and the FPGA is using Shared Virtual Memory (SVM) - * \param N : integer pointer addressing the size of FFT3d - * \param inp : float2 pointer to input data of size [N * N * N] - * \param out : float2 pointer to output data of size [N * N * N] - * \param inv : int toggle to activate backward FFT - * \param interleaving : 1 if using burst interleaved global memory buffers - * \return fpga_t : time taken in milliseconds for data transfers and execution - */ -fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, float2 *inp, float2 *out, bool inv) { - fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - cl_int status = 0; - int num_pts = N * N * N; - - /* - const char* board_name; - int *bytes; - aocl_mmd_offline_info_t info_id; - info_id = AOCL_MMD_BOARD_NAMES; - aocl_mmd_get_offline_info(info_id, sizeof(char*), &board_name, size_t(int)); - - svm_handle = aocl_mmd_open(board_name); - if(svm_handle < 0 ){ - return NULL; - } - return aocl_mmd_shared_mem_alloc(svm_handle, sz, inData, device_ptr); - */ - - // if N is not a power of 2 - if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || !(svm_enabled)){ - return fft_time; - } - -#ifdef VERBOSE - printf("Launching%s 3d FFT transform in DDR \n", inv ? " inverse":""); -#endif - - // Can't pass bool to device, so convert it to int - int inverse_int = (int)inv; - - // Setup kernels - cl_kernel fetch1_kernel = clCreateKernel(program, "fetchBitrev1", &status); - checkError(status, "Failed to create fetch1 kernel"); - cl_kernel ffta_kernel = clCreateKernel(program, "fft3da", &status); - checkError(status, "Failed to create fft3da kernel"); - cl_kernel transpose_kernel = clCreateKernel(program, "transpose", &status); - checkError(status, "Failed to create transpose kernel"); - cl_kernel fftb_kernel = clCreateKernel(program, "fft3db", &status); - checkError(status, "Failed to create fft3db kernel"); - cl_kernel store1_kernel = clCreateKernel(program, "transposeStore1", &status); - checkError(status, "Failed to create store1 kernel"); - - cl_kernel fetch2_kernel = clCreateKernel(program, "fetchBitrev2", &status); - checkError(status, "Failed to create fetch2 kernel"); - cl_kernel fftc_kernel = clCreateKernel(program, "fft3dc", &status); - checkError(status, "Failed to create fft3dc kernel"); - cl_kernel store2_kernel = clCreateKernel(program, "transposeStore2", &status); - checkError(status, "Failed to create store2 kernel"); - - // Setup Queues to the kernels - queue_setup(); - - // Device memory buffers - cl_mem d_inOutData; - d_inOutData = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); - - // allocate SVM buffers - float2 *h_inData, *h_outData; - h_inData = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); - h_outData = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); - - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData, sizeof(float2) * num_pts, 0, NULL, NULL); - checkError(status, "Failed to map input data"); - - // copy data into h_inData - for(size_t i = 0; i < num_pts; i++){ - h_inData[i].x = inp[i].x; - h_inData[i].y = inp[i].y; - } - - status = clEnqueueSVMUnmap(queue1, (void *)h_inData, 0, NULL, NULL); - checkError(status, "Failed to unmap input data"); - - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); - checkError(status, "Failed to map input data"); - - // copy data into h_inData - for(size_t i = 0; i < num_pts; i++){ - h_outData[i].x = 0.0; - h_outData[i].y = 0.0; - } - - status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); - checkError(status, "Failed to unmap input data"); - - // write to fetch kernel using SVM based PCIe - status = clSetKernelArgSVMPointer(fetch1_kernel, 0, (void *)h_inData); - checkError(status, "Failed to set fetch1 kernel arg"); - - status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set ffta kernel arg"); - status=clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fftb kernel arg"); - - // kernel stores to DDR memory - status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void*)&d_inOutData); - checkError(status, "Failed to set store1 kernel arg"); - - // kernel fetches from DDR memory - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void*)&d_inOutData); - checkError(status, "Failed to set fetch2 kernel arg"); - status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fftc kernel arg"); - - // kernel stores using SVM based PCIe to host - status = clSetKernelArgSVMPointer(store2_kernel, 0, (void*)h_outData); - checkError(status, "Failed to set store2 kernel arg"); - - fft_time.exec_t = getTimeinMilliSec(); - double first_half = getTimeinMilliSec(); - status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - - status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); - - status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second fft kernel"); - - status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second transpose kernel"); - - status = clFinish(queue1); - checkError(status, "failed to finish"); - status = clFinish(queue2); - checkError(status, "failed to finish"); - status = clFinish(queue3); - checkError(status, "failed to finish"); - status = clFinish(queue4); - checkError(status, "failed to finish"); - status = clFinish(queue5); - checkError(status, "failed to finish"); - first_half = getTimeinMilliSec() - first_half; - - double second_half = getTimeinMilliSec(); - // enqueue fetch to same queue as the store kernel due to data dependency - status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - - status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue3, store2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); - - status = clFinish(queue5); - checkError(status, "failed to finish"); - status = clFinish(queue4); - checkError(status, "failed to finish"); - status = clFinish(queue3); - checkError(status, "failed to finish"); - /* - status = clFinish(queue2); - checkError(status, "failed to finish"); - status = clFinish(queue1); - checkError(status, "failed to finish"); - */ - - second_half = getTimeinMilliSec() - second_half; - fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; - - printf("First half: %lf Second half: %lf\n\n", first_half, second_half); - - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, - (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); - checkError(status, "Failed to map out data"); - - for(size_t i = 0; i < num_pts; i++){ - out[i].x = h_outData[i].x; - out[i].y = h_outData[i].y; - } - - status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); - checkError(status, "Failed to unmap out data"); - - if (h_inData) - clSVMFree(context, h_inData); - if (h_outData) - clSVMFree(context, h_outData); - - queue_cleanup(); - - if (d_inOutData) - clReleaseMemObject(d_inOutData); - - if(fetch1_kernel) - clReleaseKernel(fetch1_kernel); - if(fetch2_kernel) - clReleaseKernel(fetch2_kernel); - - if(ffta_kernel) - clReleaseKernel(ffta_kernel); - if(fftb_kernel) - clReleaseKernel(fftb_kernel); - if(fftc_kernel) - clReleaseKernel(fftc_kernel); - - if(transpose_kernel) - clReleaseKernel(transpose_kernel); - - if(store1_kernel) - clReleaseKernel(store1_kernel); - if(store2_kernel) - clReleaseKernel(store2_kernel); - - fft_time.valid = 1; - return fft_time; -} - -/** - * \brief compute an out-of-place single precision complex 3D-FFT using the DDR of the FPGA for 3D Transpose - * \param N : integer pointer addressing the size of FFT3d - * \param inp : float2 pointer to input data of size [N * N * N] - * \param out : float2 pointer to output data of size [N * N * N] - * \param inv : int toggle to activate backward FFT - * \param interleaving : 1 if using burst interleaved global memory buffers - * \return fpga_t : time taken in milliseconds for data transfers and execution - */ -fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, bool inv) { - fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - cl_int status = 0; - int num_pts = N * N * N; - - // if N is not a power of 2 - if(inp == NULL || out == NULL || ( (N & (N-1)) !=0)){ - return fft_time; - } - -#ifdef VERBOSE - printf("Launching%s 3d FFT transform in DDR \n", inv ? " inverse":""); -#endif - - // Can't pass bool to device, so convert it to int - int inverse_int = (int)inv; - - // Setup kernels - cl_kernel fetch1_kernel = clCreateKernel(program, "fetchBitrev1", &status); - checkError(status, "Failed to create fetch1 kernel"); - cl_kernel ffta_kernel = clCreateKernel(program, "fft3da", &status); - checkError(status, "Failed to create fft3da kernel"); - cl_kernel transpose_kernel = clCreateKernel(program, "transpose", &status); - checkError(status, "Failed to create transpose kernel"); - cl_kernel fftb_kernel = clCreateKernel(program, "fft3db", &status); - checkError(status, "Failed to create fft3db kernel"); - cl_kernel store1_kernel = clCreateKernel(program, "transposeStore1", &status); - checkError(status, "Failed to create store1 kernel"); - - cl_kernel fetch2_kernel = clCreateKernel(program, "fetchBitrev2", &status); - checkError(status, "Failed to create fetch2 kernel"); - cl_kernel fftc_kernel = clCreateKernel(program, "fft3dc", &status); - checkError(status, "Failed to create fft3dc kernel"); - cl_kernel store2_kernel = clCreateKernel(program, "transposeStore2", &status); - checkError(status, "Failed to create store2 kernel"); - - // Setup Queues to the kernels - queue_setup(); - - // Device memory buffers - cl_mem d_inData, d_transpose, d_outData; - d_inData = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); - checkError(status, "Failed to allocate input device buffer\n"); - - d_transpose = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); - - d_outData = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); - - // Copy data from host to device - fft_time.pcie_write_t = getTimeinMilliSec(); - - status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * num_pts, inp, 0, NULL, NULL); - - status = clFinish(queue1); - checkError(status, "failed to finish"); - - fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; - checkError(status, "Failed to copy data to device"); - - status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData); - checkError(status, "Failed to set fetch1 kernel arg"); - - status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set ffta kernel arg"); - status=clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fftb kernel arg"); - - status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_transpose); - checkError(status, "Failed to set store1 kernel arg"); - - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_transpose); - checkError(status, "Failed to set fetch2 kernel arg"); - status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fftc kernel arg"); - status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData); - checkError(status, "Failed to set store2 kernel arg"); - - fft_time.exec_t = getTimeinMilliSec(); - status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - - status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); - - status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second fft kernel"); - - status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second transpose kernel"); - - // enqueue fetch to same queue as the store kernel due to data dependency - status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - - status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue3, store2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); - - status = clFinish(queue5); - checkError(status, "failed to finish"); - status = clFinish(queue4); - checkError(status, "failed to finish"); - status = clFinish(queue3); - checkError(status, "failed to finish"); - status = clFinish(queue2); - checkError(status, "failed to finish"); - status = clFinish(queue1); - checkError(status, "failed to finish"); - - fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; - - // Copy results from device to host - fft_time.pcie_read_t = getTimeinMilliSec(); - status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * num_pts, out, 0, NULL, NULL); - - status = clFinish(queue1); - checkError(status, "failed to finish reading DDR using PCIe"); - - fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; - checkError(status, "Failed to copy data from device"); - - queue_cleanup(); - - if (d_inData) - clReleaseMemObject(d_inData); - if (d_outData) - clReleaseMemObject(d_outData); - if (d_transpose) - clReleaseMemObject(d_transpose); - - if(fetch1_kernel) - clReleaseKernel(fetch1_kernel); - if(fetch2_kernel) - clReleaseKernel(fetch2_kernel); - - if(ffta_kernel) - clReleaseKernel(ffta_kernel); - if(fftb_kernel) - clReleaseKernel(fftb_kernel); - if(fftc_kernel) - clReleaseKernel(fftc_kernel); - - if(transpose_kernel) - clReleaseKernel(transpose_kernel); - - if(store1_kernel) - clReleaseKernel(store1_kernel); - if(store2_kernel) - clReleaseKernel(store2_kernel); + // Create one command queue for each kernel. + queue1 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status); + checkError(status, "Failed to create command queue1"); + queue2 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status); + checkError(status, "Failed to create command queue2"); + queue3 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status); + checkError(status, "Failed to create command queue3"); + queue4 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status); + checkError(status, "Failed to create command queue4"); + queue5 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status); + checkError(status, "Failed to create command queue5"); + queue6 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status); + checkError(status, "Failed to create command queue6"); + queue7 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status); + checkError(status, "Failed to create command queue6"); + queue8 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status); + checkError(status, "Failed to create command queue6"); +} - fft_time.valid = 1; - return fft_time; +/** + * \brief Release all command queues + */ +void queue_cleanup() { + if(queue1) + clReleaseCommandQueue(queue1); + if(queue2) + clReleaseCommandQueue(queue2); + if(queue3) + clReleaseCommandQueue(queue3); + if(queue4) + clReleaseCommandQueue(queue4); + if(queue5) + clReleaseCommandQueue(queue5); + if(queue6) + clReleaseCommandQueue(queue6); + if(queue7) + clReleaseCommandQueue(queue7); + if(queue8) + clReleaseCommandQueue(queue8); } /** @@ -1544,6 +204,7 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, bool inv) { * \param how_many : number of batched computations * \return fpga_t : time taken in milliseconds for data transfers and execution */ +/* fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, float2 *inp, float2 *out, bool inv, bool interleaving, int how_many) { fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_int status = 0; @@ -1677,7 +338,6 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, float2 *inp, float2 *out, bool inv, bool clReleaseEvent(write_event[0]); // Loop over the 3 stages - for(size_t i = 2; i < how_many; i++){ // Unblocking transfers between DDR and host @@ -1870,347 +530,4 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, float2 *inp, float2 *out, bool inv, bool fft_time.valid = 1; return fft_time; } - -/** - * \brief compute an batched out-of-place single precision complex 3D-FFT using the DDR of the FPGA for 3D Transpose and for data transfers between host's main memory and FPGA using Shared Virtual Memory - * \param N : integer pointer addressing the size of FFT3d - * \param inp : float2 pointer to input data of size [N * N * N] - * \param out : float2 pointer to output data of size [N * N * N] - * \param inv : int toggle to activate backward FFT - * \param how_many : number of batched computations - * \return fpga_t : time taken in milliseconds for data transfers and execution - */ -fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, float2 *inp, float2 *out, bool inv, int how_many) { - fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - cl_int status = 0; - int num_pts = N * N * N; - - // if N is not a power of 2 - if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || (how_many <= 0)){ - return fft_time; - } - - if(!svm_enabled){ - return fft_time; - } - -#ifdef VERBOSE - printf("Launching%s 3d FFT transform in DDR \n", inv ? " inverse":""); -#endif - - // Can't pass bool to device, so convert it to int - int inverse_int = (int)inv; - - // Setup kernels - cl_kernel fetch1_kernel = clCreateKernel(program, "fetchBitrev1", &status); - checkError(status, "Failed to create fetch1 kernel"); - cl_kernel ffta_kernel = clCreateKernel(program, "fft3da", &status); - checkError(status, "Failed to create fft3da kernel"); - cl_kernel transpose_kernel = clCreateKernel(program, "transpose", &status); - checkError(status, "Failed to create transpose kernel"); - cl_kernel fftb_kernel = clCreateKernel(program, "fft3db", &status); - checkError(status, "Failed to create fft3db kernel"); - cl_kernel store1_kernel = clCreateKernel(program, "transposeStore1", &status); - checkError(status, "Failed to create store1 kernel"); - - cl_kernel fetch2_kernel = clCreateKernel(program, "fetchBitrev2", &status); - checkError(status, "Failed to create fetch2 kernel"); - cl_kernel fftc_kernel = clCreateKernel(program, "fft3dc", &status); - checkError(status, "Failed to create fft3dc kernel"); - cl_kernel store2_kernel = clCreateKernel(program, "transposeStore2", &status); - checkError(status, "Failed to create store2 kernel"); - - // Setup Queues to the kernels - queue_setup(); - - // Device memory buffers: double buffers - cl_mem d_outData_0; - d_outData_0 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); - - cl_mem d_outData_1; - d_outData_1 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); - - // allocate and initialize SVM buffers - - float2 *h_inData[how_many], *h_outData[how_many]; - for(size_t i = 0; i < how_many; i++){ - h_inData[i] = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); - h_outData[i] = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); - - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData[i], sizeof(float2) * num_pts, 0, NULL, NULL); - checkError(status, "Failed to map input data"); - - // copy data into h_inData - size_t stride = i * num_pts; - for(size_t j = 0; j < num_pts; j++){ - h_inData[i][j].x = inp[stride + j].x; - h_inData[i][j].y = inp[stride + j].y; - } - - status = clEnqueueSVMUnmap(queue1, (void *)h_inData[i], 0, NULL, NULL); - checkError(status, "Failed to unmap input data"); - - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_outData[i], sizeof(float2) * num_pts, 0, NULL, NULL); - checkError(status, "Failed to map input data"); - - // copy data into h_inData - for(size_t j = 0; j < num_pts; j++){ - h_outData[i][j].x = 0.0; - h_outData[i][j].y = 0.0; - } - - status = clEnqueueSVMUnmap(queue1, (void *)h_outData[i], 0, NULL, NULL); - checkError(status, "Failed to unmap input data"); - } - - /* - * kernel arguments - */ - // write to fetch kernel using SVM based PCIe - status = clSetKernelArgSVMPointer(fetch1_kernel, 0, (void *)h_inData[0]); - checkError(status, "Failed to set fetch1 kernel arg"); - - status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set ffta kernel arg"); - status=clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fftb kernel arg"); - - // kernel stores to DDR memory - status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_outData_0); - checkError(status, "Failed to set store1 kernel arg"); - - /* - * First batch write phase - */ - fft_time.exec_t = getTimeinMilliSec(); - status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - - status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); - - status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second fft kernel"); - - status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second transpose kernel"); - - for(size_t i = 1; i < how_many; i++){ - - /* - * Read phase of previous iteration - */ - // kernel fetches from DDR memory - // kernel stores using SVM based PCIe to host - if( (i % 2) == 1){ - // if odd number of batches - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData_0); - checkError(status, "Failed to set fetch2 kernel arg"); - - // Start fetch2 phase with same queue as store1 - status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - } - else{ - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData_1); - checkError(status, "Failed to set fetch2 kernel arg"); - - // Start fetch2 phase with same queue as store1 - status = clEnqueueTask(queue8, fetch2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - } - status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fftc kernel arg"); - status = clSetKernelArgSVMPointer(store2_kernel, 0, (void *)h_outData[i-1]); - checkError(status, "Failed to set store2 kernel arg"); - - status = clEnqueueTask(queue6, fftc_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue7, store2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); - - /* - * write phase of current iteration - */ - // change write phase host and ddr ptrs - status = clSetKernelArgSVMPointer(fetch1_kernel, 0, (void *)h_inData[i]); - checkError(status, "Failed to set fetch1 kernel arg"); - if(i % 2 == 1){ - status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_outData_1); - checkError(status, "Failed to set store1 kernel arg"); - } - else{ - status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_outData_0); - checkError(status, "Failed to set store1 kernel arg"); - } - - // Start write phase of current iteration - status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - - status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); - - status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second fft kernel"); - - if(i % 2 == 1){ - status = clEnqueueTask(queue8, store1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second transpose kernel"); - } - else{ - status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second transpose kernel"); - } - } - - if(how_many % 2 == 1){ - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData_0); - checkError(status, "Failed to set fetch2 kernel arg"); - - status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - } - else{ - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData_1); - checkError(status, "Failed to set fetch2 kernel arg"); - status = clEnqueueTask(queue8, fetch2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - } - status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fftc kernel arg"); - status = clSetKernelArgSVMPointer(store2_kernel, 0, (void *)h_outData[how_many-1]); - checkError(status, "Failed to set store2 kernel arg"); - - status = clEnqueueTask(queue6, fftc_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue7, store2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); - - status = clFinish(queue1); - checkError(status, "Failed to finish queue1"); - status = clFinish(queue2); - checkError(status, "Failed to finish queue2"); - status = clFinish(queue3); - checkError(status, "Failed to finish queue1"); - status = clFinish(queue4); - checkError(status, "Failed to finish queue2"); - status = clFinish(queue5); - checkError(status, "Failed to finish queue1"); - status = clFinish(queue6); - checkError(status, "Failed to finish queue2"); - status = clFinish(queue7); - checkError(status, "Failed to finish queue1"); - status = clFinish(queue8); - checkError(status, "Failed to finish queue2"); - - fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; - - for(size_t i = 0; i < how_many; i++){ - - status = clEnqueueSVMMap(queue2, CL_TRUE, CL_MAP_READ, - (void *)h_outData[i], sizeof(float2) * num_pts, 0, NULL, NULL); - checkError(status, "Failed to map out data"); - - size_t stride = i * num_pts; - for(size_t j = 0; j < num_pts; j++){ - out[stride + j].x = h_outData[i][j].x; - out[stride + j].y = h_outData[i][j].y; - } - - status = clEnqueueSVMUnmap(queue2, (void *)h_outData[i], 0, NULL, NULL); - checkError(status, "Failed to unmap out data"); - } - - for(size_t i = 0; i < how_many; i++){ - clSVMFree(context, h_inData[i]); - clSVMFree(context, h_outData[i]); - } - - queue_cleanup(); - - if (d_outData_0) - clReleaseMemObject(d_outData_0); - if (d_outData_1) - clReleaseMemObject(d_outData_1); - - if(fetch1_kernel) - clReleaseKernel(fetch1_kernel); - if(fetch2_kernel) - clReleaseKernel(fetch2_kernel); - - if(ffta_kernel) - clReleaseKernel(ffta_kernel); - if(fftb_kernel) - clReleaseKernel(fftb_kernel); - if(fftc_kernel) - clReleaseKernel(fftc_kernel); - - if(transpose_kernel) - clReleaseKernel(transpose_kernel); - - if(store1_kernel) - clReleaseKernel(store1_kernel); - if(store2_kernel) - clReleaseKernel(store2_kernel); - - fft_time.valid = 1; - return fft_time; -} - - -/** - * \brief Create a command queue for each kernel - */ -void queue_setup(){ - cl_int status = 0; - // Create one command queue for each kernel. - queue1 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status); - checkError(status, "Failed to create command queue1"); - queue2 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status); - checkError(status, "Failed to create command queue2"); - queue3 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status); - checkError(status, "Failed to create command queue3"); - queue4 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status); - checkError(status, "Failed to create command queue4"); - queue5 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status); - checkError(status, "Failed to create command queue5"); - queue6 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status); - checkError(status, "Failed to create command queue6"); - queue7 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status); - checkError(status, "Failed to create command queue6"); - queue8 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status); - checkError(status, "Failed to create command queue6"); -} - -/** - * \brief Release all command queues - */ -void queue_cleanup() { - if(queue1) - clReleaseCommandQueue(queue1); - if(queue2) - clReleaseCommandQueue(queue2); - if(queue3) - clReleaseCommandQueue(queue3); - if(queue4) - clReleaseCommandQueue(queue4); - if(queue5) - clReleaseCommandQueue(queue5); - if(queue6) - clReleaseCommandQueue(queue6); - if(queue7) - clReleaseCommandQueue(queue7); - if(queue8) - clReleaseCommandQueue(queue8); -} +*/ \ No newline at end of file diff --git a/api/src/fpga_state.h b/api/src/fpga_state.h new file mode 100644 index 0000000..9a0bde3 --- /dev/null +++ b/api/src/fpga_state.h @@ -0,0 +1,23 @@ +// Author: Arjun Ramaswami + +#ifndef KERNEL_VARS +#define KERNEL_VARS + +#include "CL/opencl.h" + +extern cl_platform_id platform; +extern cl_device_id *devices; +extern cl_device_id device; +extern cl_context context; +extern cl_program program; +extern cl_command_queue queue1, queue2, queue3; +extern cl_command_queue queue4, queue5, queue6; +extern cl_command_queue queue7, queue8; + +extern bool svm_enabled; +//extern int svm_handle; + +extern void queue_setup(); +extern void queue_cleanup(); + +#endif From 612f386d93dfcfeed7abb9509ceb75da617620b3 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Mon, 7 Dec 2020 17:06:58 +0100 Subject: [PATCH 36/76] Hw Counters and const inp --- .gitignore | 4 +- CMakeLists.txt | 10 +- api/include/fftfpga/fftfpga.h | 49 ++-- api/src/fft1d.c | 267 ++++++++++++++++- api/src/fft2d.c | 94 +++++- api/src/fft3d.c | 163 +++++++---- api/src/fftfpga.c | 340 +--------------------- examples/CMakeLists.txt | 2 +- examples/common/helper.c | 34 ++- examples/common/helper.h | 2 +- examples/common/verify_fftw.c | 2 +- examples/common/verify_fftw.h | 2 +- examples/fft1d.c | 15 +- examples/fft1d_batch.c | 130 +++++++++ examples/fft1d_svm.c | 14 +- examples/fft2d.c | 15 +- examples/fft2d_bram_svm.c | 131 +++++++++ examples/fft3d_bram.c | 12 +- examples/fft3d_ddr.c | 16 +- examples/fft3d_ddr_batch.c | 139 +++++++++ examples/fft3d_ddr_svm.c | 19 +- examples/fft3d_ddr_svm_batch.c | 23 +- examples/fft3d_svm.c | 15 +- kernels/CMakeLists.txt | 12 +- kernels/common/fft_config.h.in | 3 +- kernels/fft1d/fft1d.cl | 7 +- kernels/fft2d/fft2d_bram_opt.cl | 27 +- kernels/fft3d/fft3d_ddr.cl | 8 +- kernels/fft3d/fft3d_ddr_svm.cl | 499 ++++++++++++++++++++++++++++++++ tests/test_fft1d_fpga.cpp | 99 +++---- 30 files changed, 1578 insertions(+), 575 deletions(-) create mode 100644 examples/fft1d_batch.c create mode 100644 examples/fft2d_bram_svm.c create mode 100755 examples/fft3d_ddr_batch.c create mode 100755 kernels/fft3d/fft3d_ddr_svm.cl diff --git a/.gitignore b/.gitignore index 6284390..96087d3 100755 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ -build/ -debug/ +build*/ +debug*/ test/ bin/ fpgabitstream/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 14b3340..dfbcc32 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ # Author: Arjun Ramaswami -cmake_minimum_required (VERSION 3.1) +cmake_minimum_required (VERSION 3.10.3) project(fft) @@ -31,13 +31,7 @@ find_package(IntelFPGAOpenCL REQUIRED) # Find FFTW set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR}/extern/findFFTW) - -find_package(FFTW) -if(FFTW_FOUND) - message("-- FFTW library found") -else() - message(WARNING, "FFTW library not found") -endif() +find_package(FFTW REQUIRED) # Link argparse as static library add_subdirectory(${CMAKE_SOURCE_DIR}/extern/argparse) diff --git a/api/include/fftfpga/fftfpga.h b/api/include/fftfpga/fftfpga.h index 47a8206..81dbb2f 100755 --- a/api/include/fftfpga/fftfpga.h +++ b/api/include/fftfpga/fftfpga.h @@ -30,9 +30,12 @@ typedef struct { * Record time in milliseconds of different FPGA runtime stages */ typedef struct fpga_timing { - double pcie_read_t; /**< Time to read from DDR to host using PCIe bus */ - double pcie_write_t; /**< Time to write from DDR to host using PCIe bus */ - double exec_t; /**< Kernel execution time */ + double pcie_read_t; /**< Time to read from DDR to host using PCIe bus */ + double pcie_write_t; /**< Time to write from DDR to host using PCIe bus */ + double exec_t; /**< Kernel execution time from CPU wall clock time*/ + double hw_pcie_read_t; /**< HW Counter Time to read from DDR to host using PCIe bus */ + double hw_pcie_write_t; /**< HW Counter Time to write from DDR to host using PCIe bus */ + double hw_exec_t; /**< Kernel execution time from HW counters*/ int valid; /**< Represents 1 signifying valid execution */ } fpga_t; @@ -78,9 +81,7 @@ extern void* fftfpgaf_complex_malloc(size_t sz); * @param iter : number of iterations of the N point FFT * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, bool inv, int iter); - -extern fpga_t fftfpgaf_c2c_1d_svm(int N, float2 *inp, float2 *out, bool inv, int batch); +extern fpga_t fftfpga_c2c_1d(int N, const double2 *inp, double2 *out, bool inv, int iter); /** * @brief compute an out-of-place single precision complex 1D-FFT on the FPGA @@ -91,7 +92,11 @@ extern fpga_t fftfpgaf_c2c_1d_svm(int N, float2 *inp, float2 *out, bool inv, int * @param iter : number of iterations of the N point FFT * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, bool inv, int iter); +extern fpga_t fftfpgaf_c2c_1d(int N, const float2 *inp, float2 *out, bool inv, int iter); + +extern fpga_t fftfpgaf_c2c_1d_svm(int N, const float2 *inp, float2 *out, bool inv, int batch); + +extern fpga_t fftfpgaf_c2c_1d_batch(int N, const float2 *inp, float2 *out, bool inv, unsigned how_many); /** * @brief compute an out-of-place single precision complex 2D-FFT using the BRAM of the FPGA @@ -103,7 +108,7 @@ extern fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, bool inv, int ite * @param how_many : number of 2D FFTs to computer, default 1 * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, bool inv, bool interleaving, int how_many); +extern fpga_t fftfpgaf_c2c_2d_bram(int N, const float2 *inp, float2 *out, bool inv, bool interleaving, int how_many); /** * @brief compute an out-of-place single precision complex 2DFFT using the BRAM of the FPGA and Shared Virtual Memory for Host to Device Communication @@ -114,7 +119,7 @@ extern fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, bool inv, bo * @param how_many : number of 2D FFTs to computer, default 1 * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpgaf_c2c_2d_bram_svm(int N, float2 *inp, float2 *out, bool inv, int how_many); +extern fpga_t fftfpgaf_c2c_2d_bram_svm(int N, const float2 *inp, float2 *out, bool inv, int how_many); /** * @brief compute an out-of-place single precision complex 2D-FFT using the DDR of the FPGA @@ -124,7 +129,7 @@ extern fpga_t fftfpgaf_c2c_2d_bram_svm(int N, float2 *inp, float2 *out, bool inv * @param inv : int toggle to activate backward FFT * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, bool inv); +extern fpga_t fftfpgaf_c2c_2d_ddr(int N, const float2 *inp, float2 *out, bool inv); /** * @brief compute an out-of-place single precision complex 3D-FFT using the BRAM of the FPGA @@ -135,7 +140,7 @@ extern fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, bool inv); * @param interleaving : enable burst interleaved global memory buffers * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, bool inv, bool interleaving); +extern fpga_t fftfpgaf_c2c_3d_bram(int N, const float2 *inp, float2 *out, bool inv, bool interleaving); /** * @brief compute an out-of-place single precision complex 3D-FFT using the DDR of the FPGA @@ -145,21 +150,31 @@ extern fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, bool inv, bo * @param inv : int toggle to activate backward FFT * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, bool inv); +extern fpga_t fftfpgaf_c2c_3d_ddr(int N, const float2 *inp, float2 *out, bool inv); -extern fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, float2 *inp, float2 *out, bool inv, bool interleaving, int how_many); +extern fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, const float2 *inp, float2 *out, bool inv, bool interleaving, int how_many); /** * @brief compute an out-of-place single precision complex 3D-FFT using the DDR of the FPGA and Shared Virtual Memory for Host to Device Communication - * @param N : integer pointer addressing the size of FFT3d + * @param N : unsigned integer size of FFT3d * @param inp : float2 pointer to input data of size [N * N * N] * @param out : float2 pointer to output data of size [N * N * N] - * @param inv : int toggle to activate backward FFT + * @param inv : toggle to activate backward FFT + * @param interleaving : toggle interleaved device memory * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, float2 *inp, float2 *out, bool inv); +extern fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, bool interleaving); -extern fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, float2 *inp, float2 *out, bool inv, int how_many); +/** + * @brief compute an out-of-place single precision complex 3D-FFT using the DDR of the FPGA and Shared Virtual Memory for Host to Device Communication + * @param N : unsigned integer size of FFT3d + * @param inp : float2 pointer to input data of size [N * N * N] + * @param out : float2 pointer to output data of size [N * N * N] + * @param inv : toggle to activate backward FFT + * @param interleaving : toggle interleaved device memory + * @return fpga_t : time taken in milliseconds for data transfers and execution + */ +extern fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, const float2 *inp, float2 *out, bool inv, int how_many); #endif diff --git a/api/src/fft1d.c b/api/src/fft1d.c index efa9da9..900bddb 100644 --- a/api/src/fft1d.c +++ b/api/src/fft1d.c @@ -24,8 +24,8 @@ * \param batch : number of batched executions of 1D FFT * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, bool inv, int batch){ - fpga_t fft_time = {0.0, 0.0, 0.0, 0}; +fpga_t fftfpga_c2c_1d(int N, const double2 *inp, double2 *out, bool inv, int batch){ + fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; cl_kernel fetch_kernel = NULL, fft_kernel = NULL; cl_int status = 0; @@ -48,9 +48,10 @@ fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, bool inv, int batch){ checkError(status, "Failed to allocate output device buffer\n"); // Copy data from host to device + cl_event writeBuf_event; fft_time.pcie_write_t = getTimeinMilliSec(); - status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(double2) * N * batch, inp, 0, NULL, NULL); + status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(double2) * N * batch, inp, 0, NULL, &writeBuf_event); status = clFinish(queue1); checkError(status, "failed to finish writing buffer using PCIe"); @@ -58,6 +59,13 @@ fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, bool inv, int batch){ fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; checkError(status, "Failed to copy data to device"); + cl_ulong writeBuf_start = 0.0, writeBuf_end = 0.0; + + clGetEventProfilingInfo(writeBuf_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &writeBuf_start, NULL); + clGetEventProfilingInfo(writeBuf_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &writeBuf_end, NULL); + + fft_time.hw_pcie_write_t = (cl_double)(writeBuf_end - writeBuf_start) * (cl_double)(1e-06); + // Can't pass bool to device, so convert it to int int inverse_int = (int)inv; @@ -85,10 +93,11 @@ fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, bool inv, int batch){ size_t gs = batch * ls; // Measure execution time + cl_event exec_event; fft_time.exec_t = getTimeinMilliSec(); // FFT1d kernel is the SWI kernel - status = clEnqueueTask(queue1, fft_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue1, fft_kernel, 0, NULL, &exec_event); checkError(status, "Failed to launch fft1d kernel"); status = clEnqueueNDRangeKernel(queue2, fetch_kernel, 1, NULL, &gs, &ls, 0, NULL, NULL); @@ -103,9 +112,17 @@ fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, bool inv, int batch){ // Record execution time fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + cl_ulong kernel_start = 0, kernel_end = 0; + + clGetEventProfilingInfo(exec_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); + clGetEventProfilingInfo(exec_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end, NULL); + + fft_time.hw_exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); + // Copy results from device to host + cl_event readBuf_event; fft_time.pcie_read_t = getTimeinMilliSec(); - status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * batch, out, 0, NULL, NULL); + status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * batch, out, 0, NULL, &readBuf_event); status = clFinish(queue1); checkError(status, "failed to finish reading buffer using PCIe"); @@ -113,6 +130,12 @@ fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, bool inv, int batch){ fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; checkError(status, "Failed to copy data from device"); + cl_ulong readBuf_start = 0, readBuf_end = 0; + clGetEventProfilingInfo(readBuf_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &readBuf_start, NULL); + clGetEventProfilingInfo(readBuf_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &readBuf_end, NULL); + + fft_time.hw_pcie_read_t = (cl_double)(readBuf_end - readBuf_start) * (cl_double)(1e-06); + // Cleanup if (d_inData) clReleaseMemObject(d_inData); @@ -136,9 +159,9 @@ fpga_t fftfpga_c2c_1d(int N, double2 *inp, double2 *out, bool inv, int batch){ * \param batch : number of batched executions of 1D FFT * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, bool inv, int batch){ +fpga_t fftfpgaf_c2c_1d(int N, const float2 *inp, float2 *out, bool inv, int batch){ - fpga_t fft_time = {0.0, 0.0, 0.0, 0}; + fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; cl_kernel kernel1 = NULL, kernel2 = NULL; cl_int status = 0; @@ -253,8 +276,8 @@ fpga_t fftfpgaf_c2c_1d(int N, float2 *inp, float2 *out, bool inv, int batch){ * \param batch : number of batched executions of 1D FFT * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_1d_svm(int N, float2 *inp, float2 *out, bool inv, int batch){ - fpga_t fft_time = {0.0, 0.0, 0.0, 0}; +fpga_t fftfpgaf_c2c_1d_svm(int N, const float2 *inp, float2 *out, bool inv, int batch){ + fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; cl_int status = 0; int num_pts = N * batch; @@ -368,4 +391,230 @@ fpga_t fftfpgaf_c2c_1d_svm(int N, float2 *inp, float2 *out, bool inv, int batch) return fft_time; } +/** + * \brief compute an out-of-place batched single precision complex 1D-FFT on the FPGA + * \param N : integer pointer to size of FFT3d + * \param inp : float2 pointer to input data of size N + * \param out : float2 pointer to output data of size N + * \param inv : true for backward transforms + * \param how_many : number of batched executions of 1D FFT + * \return fpga_t : time taken in milliseconds for data transfers and execution + */ +fpga_t fftfpgaf_c2c_1d_batch(int N, const float2 *inp, float2 *out, bool inv, unsigned how_many){ + + fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; + cl_kernel fetch_kernel = NULL, fft_kernel = NULL; + cl_int status = 0; + + // if N is not a power of 2 + if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || (how_many <= 1)){ + return fft_time; + } + + queue_setup(); + // Device Buffers + cl_mem d_inData[3], d_outData[3]; + d_inData[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * N, NULL, &status); + checkError(status, "Failed to allocate input device buffer\n"); + + d_inData[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * N, NULL, &status); + checkError(status, "Failed to allocate input device buffer\n"); + + d_inData[2] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_3_INTELFPGA, sizeof(float2) * N, NULL, &status); + checkError(status, "Failed to allocate input device buffer\n"); + + d_outData[0] = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * N, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + d_outData[1] = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * N, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + d_outData[2] = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_3_INTELFPGA, sizeof(float2) * N, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + cl_event writeEvent[2]; + + fft_time.pcie_write_t = getTimeinMilliSec(); + + clEnqueueWriteBuffer(queue1, d_inData[0], CL_TRUE, 0, sizeof(float2) * N, inp, 0, NULL, NULL); + + for(size_t i = 1; i < how_many; i++){ + clEnqueueWriteBuffer(queue2, d_inData[i%2], CL_FALSE, 0, sizeof(float2) * N, &inp[i * N], 0, NULL, &writeEvent[0]); + + status = clEnqueueReadBuffer(queue3, d_outData[(i-1)%2], CL_FALSE, 0, sizeof(float2) * N, &out[(i-1) * N], 0, NULL, &writeEvent[1]); + checkError(status, "Failed to read"); + + clFinish(queue2); + clFinish(queue3); + + clWaitForEvents(2, writeEvent); + clReleaseEvent(writeEvent[0]); + clReleaseEvent(writeEvent[1]); + } + + status = clEnqueueReadBuffer(queue3, d_outData[(how_many-1) % 2], CL_FALSE, 0, sizeof(float2) * N, &out[(how_many - 1) * N], 0, NULL, &writeEvent[0]); + checkError(status, "Failed to read"); + + clFinish(queue3); + clWaitForEvents(1, &writeEvent[0]); + clReleaseEvent(writeEvent[0]); + + fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + + // Cleanup + if (d_inData[0]) + clReleaseMemObject(d_inData[0]); + if (d_inData[1]) + clReleaseMemObject(d_inData[1]); + if (d_inData[2]) + clReleaseMemObject(d_inData[2]); + + if (d_outData[0]) + clReleaseMemObject(d_outData[0]); + if (d_outData[1]) + clReleaseMemObject(d_outData[1]); + if (d_outData[2]) + clReleaseMemObject(d_outData[2]); + + if(fetch_kernel) + clReleaseKernel(fetch_kernel); + if(fft_kernel) + clReleaseKernel(fft_kernel); + + queue_cleanup(); + + fft_time.valid = 1; + return fft_time; + +} + +/* +fpga_t fftfpgaf_c2c_1d_batch(int N, float2 *inp, float2 *out, bool inv, unsigned how_many){ + + fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; + cl_kernel fetch_kernel = NULL, fft_kernel = NULL; + cl_mem d_inData[2], d_outData[2]; + cl_int status = 0; + int batch = 1; + + // if N is not a power of 2 + if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || (how_many <= 1)){ + return fft_time; + } + + queue_setup(); + + // Device Buffers + d_inData[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * N, NULL, &status); + checkError(status, "Failed to allocate input device buffer\n"); + + d_inData[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * N, NULL, &status); + checkError(status, "Failed to allocate input device buffer\n"); + + d_outData[0] = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * N, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + d_outData[1] = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * N, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + // Kernels + fetch_kernel = clCreateKernel(program, "fetch", &status); + checkError(status, "Failed to create fetch kernel"); + + fft_kernel = clCreateKernel(program, "fft1d", &status); + checkError(status, "Failed to create fft1d kernel"); + + // Set the kernel arguments + cl_event readEvent[how_many], writeEvent[how_many]; + cl_event kernelEvent1[how_many], kernelEvent2[how_many]; + cl_event wr_dep[2], rd_dep[2], kernel_dep[2]; + + int inverse_int = (int)inv; + size_t ls = N / 8; + size_t gs = ls; + //size_t gs = batch * ls; + + fft_time.pcie_write_t = getTimeinMilliSec(); + + for(size_t i = 0; i < how_many; i++){ + if(i < 2){ + clEnqueueWriteBuffer(queue1, d_inData[i%2], CL_FALSE, 0, sizeof(float2) * N, &inp[i * N], 0, NULL, &writeEvent[i]); + clFlush(queue1); + + status = clSetKernelArg(fetch_kernel, 0, sizeof(cl_mem *), &d_inData[i%2]); + checkError(status, "Failed to set fetch kernel arg 0"); + status = clSetKernelArg(fft_kernel, 0, sizeof(cl_mem *), &d_outData[i%2]); + checkError(status, "Failed to set fft kernel arg 0"); + status = clSetKernelArg(fft_kernel, 1, sizeof(cl_int), &batch); + checkError(status, "Failed to set fft kernel arg 1"); + status = clSetKernelArg(fft_kernel, 2, sizeof(cl_int), &inverse_int); + checkError(status, "Failed to set fft kernel arg 2"); + + status = clEnqueueTask(queue2, fft_kernel, 1, &writeEvent[i], &kernelEvent1[i]); + checkError(status, "Failed to launch fft1d kernel"); + + status = clEnqueueNDRangeKernel(queue3, fetch_kernel, 1, NULL, &gs, &ls, 1, &writeEvent[i], &kernelEvent2[i]); + checkError(status, "Failed to launch fetch kernel"); + + clFlush(queue2); + clFlush(queue3); + } + else{ + wr_dep[0] = kernelEvent1[i - 2]; + wr_dep[1] = kernelEvent2[i - 2]; + + clEnqueueWriteBuffer(queue1, d_inData[i%2], CL_FALSE, 0, sizeof(float2) * N, &inp[i * N], 2, wr_dep, &writeEvent[i]); + clFlush(queue1); + + kernel_dep[0] = writeEvent[i]; + kernel_dep[1] = readEvent[i-2]; + + status = clSetKernelArg(fetch_kernel, 0, sizeof(cl_mem *), &d_inData[i%2]); + checkError(status, "Failed to set fetch kernel arg 0"); + status = clSetKernelArg(fft_kernel, 0, sizeof(cl_mem *), &d_outData[i%2]); + checkError(status, "Failed to set fft kernel arg 0"); + status = clSetKernelArg(fft_kernel, 1, sizeof(cl_int), &batch); + checkError(status, "Failed to set fft kernel arg 1"); + status = clSetKernelArg(fft_kernel, 2, sizeof(cl_int), &inverse_int); + checkError(status, "Failed to set fft kernel arg 2"); + + status = clEnqueueTask(queue2, fft_kernel, 2, kernel_dep, &kernelEvent1[i]); + checkError(status, "Failed to launch fft1d kernel"); + + status = clEnqueueNDRangeKernel(queue3, fetch_kernel, 1, NULL, &gs, &ls, 2, kernel_dep, &kernelEvent2[i]); + checkError(status, "Failed to launch fetch kernel"); + + clFlush(queue2); + clFlush(queue3); + } + rd_dep[0] = kernelEvent1[i]; + rd_dep[1] = kernelEvent2[i]; + + status = clEnqueueReadBuffer(queue4, d_outData[i%2], CL_FALSE, 0, sizeof(float2) * N, &out[i*N], 2, rd_dep, &readEvent[i]); + checkError(status, "Failed to read"); + clFlush(queue4); + } + + fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + + // Cleanup + if (d_inData[0]) + clReleaseMemObject(d_inData[0]); + if (d_inData[1]) + clReleaseMemObject(d_inData[1]); + if (d_outData[0]) + clReleaseMemObject(d_outData[0]); + if (d_outData[1]) + clReleaseMemObject(d_outData[1]); + if(fetch_kernel) + clReleaseKernel(fetch_kernel); + if(fft_kernel) + clReleaseKernel(fft_kernel); + + queue_cleanup(); + + fft_time.valid = 1; + return fft_time; +} +*/ \ No newline at end of file diff --git a/api/src/fft2d.c b/api/src/fft2d.c index 9a0ad0c..e68f908 100644 --- a/api/src/fft2d.c +++ b/api/src/fft2d.c @@ -24,8 +24,8 @@ * \param iter : int toggle to activate backward FFT * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, bool inv){ - fpga_t fft_time = {0.0, 0.0, 0.0, 0}; +fpga_t fftfpgaf_c2c_2d_ddr(int N, const float2 *inp, float2 *out, bool inv){ + fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; cl_kernel fetch_kernel = NULL, fft_kernel = NULL, transpose_kernel = NULL; cl_int status = 0; int mangle_int = 0; @@ -51,9 +51,10 @@ fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, bool inv){ checkError(status, "Failed to allocate output device buffer\n"); // Copy data from host to device + cl_event writeBuf_event; fft_time.pcie_write_t = getTimeinMilliSec(); - status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * N * N, inp, 0, NULL, NULL); + status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * N * N, inp, 0, NULL, &writeBuf_event); status = clFinish(queue1); checkError(status, "failed to finish writing buffer using PCIe"); @@ -61,6 +62,13 @@ fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, bool inv){ fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; checkError(status, "Failed to copy data to device"); + cl_ulong writeBuf_start = 0.0, writeBuf_end = 0.0; + + clGetEventProfilingInfo(writeBuf_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &writeBuf_start, NULL); + clGetEventProfilingInfo(writeBuf_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &writeBuf_end, NULL); + + fft_time.hw_pcie_write_t = (cl_double)(writeBuf_end - writeBuf_start) * (cl_double)(1e-06); + // Can't pass bool to device, so convert it to int int inverse_int = (int)inv; @@ -72,6 +80,7 @@ fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, bool inv){ transpose_kernel = clCreateKernel(program, "transpose", &status); checkError(status, "Failed to create kernel"); + cl_event startExec_event[2], endExec_event[2]; // Record execution time fft_time.exec_t = getTimeinMilliSec(); @@ -85,7 +94,7 @@ fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, bool inv){ checkError(status, "Failed to set kernel arg 1"); size_t lws_fetch[] = {N}; size_t gws_fetch[] = {N * N / 8}; - status = clEnqueueNDRangeKernel(queue1, fetch_kernel, 1, 0, gws_fetch, lws_fetch, 0, NULL, NULL); + status = clEnqueueNDRangeKernel(queue1, fetch_kernel, 1, 0, gws_fetch, lws_fetch, 0, NULL, (i % 2) == 0 ? &startExec_event[0] : &startExec_event[1]); checkError(status, "Failed to launch kernel"); // Launch the fft kernel - we launch a single work item hence enqueue a task @@ -103,7 +112,7 @@ fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, bool inv){ size_t lws_transpose[] = {N}; size_t gws_transpose[] = {N * N / 8}; - status = clEnqueueNDRangeKernel(queue3, transpose_kernel, 1, 0, gws_transpose, lws_transpose, 0, NULL, NULL); + status = clEnqueueNDRangeKernel(queue3, transpose_kernel, 1, 0, gws_transpose, lws_transpose, 0, NULL, (i % 2) == 0 ? &endExec_event[0] : &endExec_event[1]); checkError(status, "Failed to launch kernel"); // Wait for all command queues to complete pending events @@ -117,9 +126,22 @@ fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, bool inv){ fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + cl_ulong kernel_start = 0, kernel_end = 0; + + clGetEventProfilingInfo(startExec_event[0], CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); + clGetEventProfilingInfo(endExec_event[0], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end, NULL); + + fft_time.hw_exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); + + clGetEventProfilingInfo(startExec_event[1], CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); + clGetEventProfilingInfo(endExec_event[1], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end, NULL); + + fft_time.hw_exec_t += (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); + // Copy results from device to host + cl_event readBuf_event; fft_time.pcie_read_t = getTimeinMilliSec(); - status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * N, out, 0, NULL, NULL); + status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * N, out, 0, NULL, &readBuf_event); status = clFinish(queue1); checkError(status, "failed to finish reading buffer using PCIe"); @@ -127,6 +149,12 @@ fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, bool inv){ fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; checkError(status, "Failed to copy data from device"); + cl_ulong readBuf_start = 0, readBuf_end = 0; + clGetEventProfilingInfo(readBuf_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &readBuf_start, NULL); + clGetEventProfilingInfo(readBuf_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &readBuf_end, NULL); + + fft_time.hw_pcie_read_t = (cl_double)(readBuf_end - readBuf_start) * (cl_double)(1e-06); + // Cleanup if (d_inData) clReleaseMemObject(d_inData); @@ -155,8 +183,8 @@ fpga_t fftfpgaf_c2c_2d_ddr(int N, float2 *inp, float2 *out, bool inv){ * \param interleaving : enable interleaved global memory buffers * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, bool inv, bool interleaving, int how_many){ - fpga_t fft_time = {0.0, 0.0, 0.0, 0}; +fpga_t fftfpgaf_c2c_2d_bram(int N, const float2 *inp, float2 *out, bool inv, bool interleaving, int how_many){ + fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; cl_kernel ffta_kernel = NULL, fftb_kernel = NULL; cl_kernel fetch_kernel = NULL, store_kernel = NULL; cl_kernel transpose_kernel = NULL; @@ -194,9 +222,10 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, bool inv, bool inte checkError(status, "Failed to allocate output device buffer\n"); // Copy data from host to device + cl_event writeBuf_event; fft_time.pcie_write_t = getTimeinMilliSec(); - status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * num_pts, inp, 0, NULL, NULL); + status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * num_pts, inp, 0, NULL, &writeBuf_event); status = clFinish(queue1); checkError(status, "failed to finish"); @@ -204,6 +233,13 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, bool inv, bool inte fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; checkError(status, "Failed to copy data to device"); + cl_ulong writeBuf_start = 0.0, writeBuf_end = 0.0; + + clGetEventProfilingInfo(writeBuf_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &writeBuf_start, NULL); + clGetEventProfilingInfo(writeBuf_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &writeBuf_end, NULL); + + fft_time.hw_pcie_write_t = (cl_double)(writeBuf_end - writeBuf_start) * (cl_double)(1e-06); + // Can't pass bool to device, so convert it to int int inverse_int = (int)inv; @@ -249,8 +285,11 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, bool inv, bool inte status = clSetKernelArg(store_kernel, 1, sizeof(cl_int), (void *)&how_many); checkError(status, "Failed to set store kernel arg"); + // Kernel Execution + cl_event startExec_event, endExec_event; + fft_time.exec_t = getTimeinMilliSec(); - status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, &startExec_event); checkError(status, "Failed to launch fetch kernel"); status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); @@ -262,7 +301,7 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, bool inv, bool inte status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); checkError(status, "Failed to launch second fft kernel"); - status = clEnqueueTask(queue5, store_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue5, store_kernel, 0, NULL, &endExec_event); checkError(status, "Failed to launch store kernel"); // Wait for all command queues to complete pending events @@ -278,9 +317,18 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, bool inv, bool inte checkError(status, "failed to finish queue5"); fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + cl_ulong kernel_start = 0, kernel_end = 0; + + clGetEventProfilingInfo(startExec_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); + clGetEventProfilingInfo(endExec_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end, NULL); + + fft_time.hw_exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); + // Copy results from device to host + cl_event readBuf_event; + fft_time.pcie_read_t = getTimeinMilliSec(); - status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * num_pts, out, 0, NULL, NULL); + status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * num_pts, out, 0, NULL, &readBuf_event); status = clFinish(queue1); checkError(status, "failed to finish reading buffer using PCIe"); @@ -288,6 +336,12 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, bool inv, bool inte fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; checkError(status, "Failed to copy data from device"); + cl_ulong readBuf_start = 0, readBuf_end = 0; + clGetEventProfilingInfo(readBuf_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &readBuf_start, NULL); + clGetEventProfilingInfo(readBuf_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &readBuf_end, NULL); + + fft_time.hw_pcie_read_t = (cl_double)(readBuf_end - readBuf_start) * (cl_double)(1e-06); + queue_cleanup(); if (d_inData) @@ -321,8 +375,8 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, float2 *inp, float2 *out, bool inv, bool inte * \param inv : int toggle to activate backward FFT * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_2d_bram_svm(int N, float2 *inp, float2 *out, bool inv, int how_many){ - fpga_t fft_time = {0.0, 0.0, 0.0, 0}; +fpga_t fftfpgaf_c2c_2d_bram_svm(int N, const float2 *inp, float2 *out, bool inv, int how_many){ + fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; cl_int status = 0; int num_pts = how_many * N * N; @@ -417,8 +471,10 @@ fpga_t fftfpgaf_c2c_2d_bram_svm(int N, float2 *inp, float2 *out, bool inv, int h status = clSetKernelArg(store_kernel, 1, sizeof(cl_int), (void *)&how_many); checkError(status, "Failed to set store kernel arg"); + cl_event startExec_event, endExec_event; + fft_time.exec_t = getTimeinMilliSec(); - status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, &startExec_event); checkError(status, "Failed to launch fetch kernel"); status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); @@ -430,7 +486,7 @@ fpga_t fftfpgaf_c2c_2d_bram_svm(int N, float2 *inp, float2 *out, bool inv, int h status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); checkError(status, "Failed to launch second fft kernel"); - status = clEnqueueTask(queue5, store_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue5, store_kernel, 0, NULL, &endExec_event); checkError(status, "Failed to launch store kernel"); // Wait for all command queues to complete pending events @@ -446,6 +502,12 @@ fpga_t fftfpgaf_c2c_2d_bram_svm(int N, float2 *inp, float2 *out, bool inv, int h checkError(status, "failed to finish queue4"); fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + cl_ulong kernel_start = 0, kernel_end = 0; + clGetEventProfilingInfo(startExec_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); + clGetEventProfilingInfo(endExec_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end, NULL); + + fft_time.hw_exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); checkError(status, "Failed to map out data"); diff --git a/api/src/fft3d.c b/api/src/fft3d.c index e5e8d51..772fa53 100644 --- a/api/src/fft3d.c +++ b/api/src/fft3d.c @@ -25,8 +25,8 @@ * \param interleaving : 1 if using burst interleaved global memory buffers * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, bool inv, bool interleaving) { - fpga_t fft_time = {0.0, 0.0, 0.0, 0}; +fpga_t fftfpgaf_c2c_3d_bram(int N, const float2 *inp, float2 *out, bool inv, bool interleaving) { + fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; cl_int status = 0; cl_kernel fft3da_kernel = NULL, fft3db_kernel = NULL, fft3dc_kernel = NULL; @@ -61,10 +61,11 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, bool inv, bool inte d_outData = clCreateBuffer(context, flagbuf2, sizeof(float2) * N * N * N, NULL, &status); checkError(status, "Failed to allocate output device buffer\n"); + cl_event writeBuf_event; // Copy data from host to device fft_time.pcie_write_t = getTimeinMilliSec(); - status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * N * N * N, inp, 0, NULL, NULL); + status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * N * N * N, inp, 0, NULL, &writeBuf_event); status = clFinish(queue1); checkError(status, "failed to finish"); @@ -72,6 +73,13 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, bool inv, bool inte fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; checkError(status, "Failed to copy data to device"); + cl_ulong writeBuf_start = 0.0, writeBuf_end = 0.0; + + clGetEventProfilingInfo(writeBuf_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &writeBuf_start, NULL); + clGetEventProfilingInfo(writeBuf_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &writeBuf_end, NULL); + + fft_time.hw_pcie_write_t = (cl_double)(writeBuf_end - writeBuf_start) * (cl_double)(1e-06); + // Can't pass bool to device, so convert it to int int inverse_int = (int)inv; @@ -109,8 +117,11 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, bool inv, bool inte status = clSetKernelArg(store_kernel, 0, sizeof(cl_mem), (void *)&d_outData); checkError(status, "Failed to set store kernel arg 0"); + // Kernel Execution + cl_event startExec_event, endExec_event; + fft_time.exec_t = getTimeinMilliSec(); - status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, &startExec_event); checkError(status, "Failed to launch fetch kernel"); status = clEnqueueTask(queue2, fft3da_kernel, 0, NULL, NULL); @@ -128,7 +139,7 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, bool inv, bool inte status = clEnqueueTask(queue6, fft3dc_kernel, 0, NULL, NULL); checkError(status, "Failed to launch third fft kernel"); - status = clEnqueueTask(queue7, store_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue7, store_kernel, 0, NULL, &endExec_event); checkError(status, "Failed to launch store transpose kernel"); // Wait for all command queues to complete pending events @@ -149,9 +160,17 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, bool inv, bool inte fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + cl_ulong kernel_start = 0, kernel_end = 0; + + clGetEventProfilingInfo(startExec_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); + clGetEventProfilingInfo(endExec_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end, NULL); + + fft_time.hw_exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); + // Copy results from device to host + cl_event readBuf_event; fft_time.pcie_read_t = getTimeinMilliSec(); - status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * N * N, out, 0, NULL, NULL); + status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * N * N, out, 0, NULL, &readBuf_event); status = clFinish(queue1); checkError(status, "failed to finish reading buffer using PCIe"); @@ -159,6 +178,12 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, bool inv, bool inte fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; checkError(status, "Failed to copy data from device"); + cl_ulong readBuf_start = 0, readBuf_end = 0; + clGetEventProfilingInfo(readBuf_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &readBuf_start, NULL); + clGetEventProfilingInfo(readBuf_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &readBuf_end, NULL); + + fft_time.hw_pcie_read_t = (cl_double)(readBuf_end - readBuf_start) * (cl_double)(1e-06); + queue_cleanup(); if (d_inData) @@ -194,34 +219,16 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, float2 *inp, float2 *out, bool inv, bool inte * \param interleaving : 1 if using burst interleaved global memory buffers * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, float2 *inp, float2 *out, bool inv) { - fpga_t fft_time = {0.0, 0.0, 0.0, 0}; +fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, bool interleaving) { + fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; cl_int status = 0; int num_pts = N * N * N; - /* - const char* board_name; - int *bytes; - aocl_mmd_offline_info_t info_id; - info_id = AOCL_MMD_BOARD_NAMES; - aocl_mmd_get_offline_info(info_id, sizeof(char*), &board_name, size_t(int)); - - svm_handle = aocl_mmd_open(board_name); - if(svm_handle < 0 ){ - return NULL; - } - return aocl_mmd_shared_mem_alloc(svm_handle, sz, inData, device_ptr); - */ - // if N is not a power of 2 if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || !(svm_enabled)){ return fft_time; } -#ifdef VERBOSE - printf("Launching%s 3d FFT transform in DDR \n", inv ? " inverse":""); -#endif - // Can't pass bool to device, so convert it to int int inverse_int = (int)inv; @@ -249,22 +256,33 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, float2 *inp, float2 *out, bool inv) { // Device memory buffers cl_mem d_inOutData; - d_inOutData = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); + if(!interleaving){ + d_inOutData = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + } + else{ + d_inOutData = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + } // allocate SVM buffers float2 *h_inData, *h_outData; h_inData = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); h_outData = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); + double svmBufferCopyIn_timer = getTimeinMilliSec(); status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData, sizeof(float2) * num_pts, 0, NULL, NULL); checkError(status, "Failed to map input data"); // copy data into h_inData + size_t num_bytes = num_pts * sizeof(float2); + memcpy(h_inData, inp, num_bytes); + /* for(size_t i = 0; i < num_pts; i++){ h_inData[i].x = inp[i].x; h_inData[i].y = inp[i].y; } + */ status = clEnqueueSVMUnmap(queue1, (void *)h_inData, 0, NULL, NULL); checkError(status, "Failed to unmap input data"); @@ -281,6 +299,9 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, float2 *inp, float2 *out, bool inv) { status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); checkError(status, "Failed to unmap input data"); + svmBufferCopyIn_timer = getTimeinMilliSec() - svmBufferCopyIn_timer; + printf("\nSVM Buffer Copy In Time: %lfms\n", svmBufferCopyIn_timer); + // write to fetch kernel using SVM based PCIe status = clSetKernelArgSVMPointer(fetch1_kernel, 0, (void *)h_inData); checkError(status, "Failed to set fetch1 kernel arg"); @@ -304,9 +325,10 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, float2 *inp, float2 *out, bool inv) { status = clSetKernelArgSVMPointer(store2_kernel, 0, (void*)h_outData); checkError(status, "Failed to set store2 kernel arg"); + cl_event startExec_event, endExec_event; + fft_time.exec_t = getTimeinMilliSec(); - double first_half = getTimeinMilliSec(); - status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, &startExec_event); checkError(status, "Failed to launch fetch kernel"); status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); @@ -321,27 +343,13 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, float2 *inp, float2 *out, bool inv) { status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); checkError(status, "Failed to launch second transpose kernel"); - status = clFinish(queue1); - checkError(status, "failed to finish"); - status = clFinish(queue2); - checkError(status, "failed to finish"); - status = clFinish(queue3); - checkError(status, "failed to finish"); - status = clFinish(queue4); - checkError(status, "failed to finish"); - status = clFinish(queue5); - checkError(status, "failed to finish"); - first_half = getTimeinMilliSec() - first_half; - - double second_half = getTimeinMilliSec(); - // enqueue fetch to same queue as the store kernel due to data dependency status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fetch kernel"); status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fft kernel"); - status = clEnqueueTask(queue3, store2_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue3, store2_kernel, 0, NULL, &endExec_event); checkError(status, "Failed to launch transpose kernel"); status = clFinish(queue5); @@ -350,30 +358,39 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, float2 *inp, float2 *out, bool inv) { checkError(status, "failed to finish"); status = clFinish(queue3); checkError(status, "failed to finish"); - /* status = clFinish(queue2); checkError(status, "failed to finish"); status = clFinish(queue1); checkError(status, "failed to finish"); - */ - second_half = getTimeinMilliSec() - second_half; fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; - printf("First half: %lf Second half: %lf\n\n", first_half, second_half); + cl_ulong kernel_start = 0, kernel_end = 0; + + clGetEventProfilingInfo(startExec_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); + clGetEventProfilingInfo(endExec_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end, NULL); + fft_time.hw_exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); + + double svmBufferCopyout_timer = getTimeinMilliSec(); status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); checkError(status, "Failed to map out data"); + memcpy(out, h_outData, num_bytes); + /* for(size_t i = 0; i < num_pts; i++){ out[i].x = h_outData[i].x; out[i].y = h_outData[i].y; } + */ status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); checkError(status, "Failed to unmap out data"); + svmBufferCopyout_timer = getTimeinMilliSec() - svmBufferCopyout_timer; + printf("SVM Buffer Copy Out Time: %lfms\n\n", svmBufferCopyout_timer); + if (h_inData) clSVMFree(context, h_inData); if (h_outData) @@ -417,8 +434,8 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, float2 *inp, float2 *out, bool inv) { * \param interleaving : 1 if using burst interleaved global memory buffers * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, bool inv) { - fpga_t fft_time = {0.0, 0.0, 0.0, 0}; +fpga_t fftfpgaf_c2c_3d_ddr(int N, const float2 *inp, float2 *out, bool inv) { + fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; cl_int status = 0; int num_pts = N * N * N; @@ -468,9 +485,10 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, bool inv) { checkError(status, "Failed to allocate output device buffer\n"); // Copy data from host to device + cl_event writeBuf_event; fft_time.pcie_write_t = getTimeinMilliSec(); - status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * num_pts, inp, 0, NULL, NULL); + status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * num_pts, inp, 0, NULL, &writeBuf_event); status = clFinish(queue1); checkError(status, "failed to finish"); @@ -478,6 +496,13 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, bool inv) { fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; checkError(status, "Failed to copy data to device"); + cl_ulong writeBuf_start = 0.0, writeBuf_end = 0.0; + + clGetEventProfilingInfo(writeBuf_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &writeBuf_start, NULL); + clGetEventProfilingInfo(writeBuf_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &writeBuf_end, NULL); + + fft_time.hw_pcie_write_t = (cl_double)(writeBuf_end - writeBuf_start) * (cl_double)(1e-06); + status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData); checkError(status, "Failed to set fetch1 kernel arg"); @@ -496,8 +521,11 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, bool inv) { status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData); checkError(status, "Failed to set store2 kernel arg"); + // Kernel Execution + cl_event startExec_event, endExec_event; + fft_time.exec_t = getTimeinMilliSec(); - status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, &startExec_event); checkError(status, "Failed to launch fetch kernel"); status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); @@ -519,7 +547,7 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, bool inv) { status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fft kernel"); - status = clEnqueueTask(queue3, store2_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue3, store2_kernel, 0, NULL, &endExec_event); checkError(status, "Failed to launch transpose kernel"); status = clFinish(queue5); @@ -535,9 +563,17 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, bool inv) { fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + cl_ulong kernel_start = 0, kernel_end = 0; + + clGetEventProfilingInfo(startExec_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); + clGetEventProfilingInfo(endExec_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end, NULL); + + fft_time.hw_exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); + // Copy results from device to host + cl_event readBuf_event; fft_time.pcie_read_t = getTimeinMilliSec(); - status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * num_pts, out, 0, NULL, NULL); + status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * num_pts, out, 0, NULL, &readBuf_event); status = clFinish(queue1); checkError(status, "failed to finish reading DDR using PCIe"); @@ -545,6 +581,12 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, bool inv) { fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; checkError(status, "Failed to copy data from device"); + cl_ulong readBuf_start = 0, readBuf_end = 0; + clGetEventProfilingInfo(readBuf_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &readBuf_start, NULL); + clGetEventProfilingInfo(readBuf_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &readBuf_end, NULL); + + fft_time.hw_pcie_read_t = (cl_double)(readBuf_end - readBuf_start) * (cl_double)(1e-06); + queue_cleanup(); if (d_inData) @@ -589,8 +631,8 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, float2 *inp, float2 *out, bool inv) { * \param how_many : number of batched computations * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, float2 *inp, float2 *out, bool inv, bool interleaving, int how_many) { - fpga_t fft_time = {0.0, 0.0, 0.0, 0}; +fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, const float2 *inp, float2 *out, bool inv, bool interleaving, int how_many) { + fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; cl_int status = 0; int num_pts = N * N * N; @@ -730,6 +772,7 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, float2 *inp, float2 *out, bool inv, bool checkError(status, "Failed to launch transpose kernel"); // Check finish of transfer and computations + /* status = clFinish(queue6); checkError(status, "failed to finish"); status = clFinish(queue5); @@ -742,7 +785,7 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, float2 *inp, float2 *out, bool inv, bool checkError(status, "failed to finish"); status = clFinish(queue1); checkError(status, "failed to finish"); - + */ clWaitForEvents(1, &write_event[0]); clReleaseEvent(write_event[0]); @@ -1063,8 +1106,8 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, float2 *inp, float2 *out, bool inv, bool * \param how_many : number of batched computations * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, float2 *inp, float2 *out, bool inv, int how_many) { - fpga_t fft_time = {0.0, 0.0, 0.0, 0}; +fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, const float2 *inp, float2 *out, bool inv, int how_many) { + fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; cl_int status = 0; int num_pts = N * N * N; diff --git a/api/src/fftfpga.c b/api/src/fftfpga.c index 95f3959..c7fe599 100755 --- a/api/src/fftfpga.c +++ b/api/src/fftfpga.c @@ -192,342 +192,4 @@ void queue_cleanup() { clReleaseCommandQueue(queue7); if(queue8) clReleaseCommandQueue(queue8); -} - -/** - * \brief compute an batched out-of-place single precision complex 3D-FFT using the DDR of the FPGA for 3D Transpose - * \param N : integer pointer addressing the size of FFT3d - * \param inp : float2 pointer to input data of size [N * N * N] - * \param out : float2 pointer to output data of size [N * N * N] - * \param inv : int toggle to activate backward FFT - * \param interleaving : enable burst interleaved global memory buffers - * \param how_many : number of batched computations - * \return fpga_t : time taken in milliseconds for data transfers and execution - */ -/* -fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, float2 *inp, float2 *out, bool inv, bool interleaving, int how_many) { - fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - cl_int status = 0; - int num_pts = N * N * N; - - // if N is not a power of 2 - if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || (how_many <= 1)){ - return fft_time; - } - -#ifdef VERBOSE - printf("Launching%s 3d FFT transform in DDR for Batched execution\n", inv ? " inverse":""); -#endif - - // Can't pass bool to device, so convert it to int - int inverse_int = (int)inv; - - // Setup kernels - cl_kernel fetch1_kernel = clCreateKernel(program, "fetchBitrev1", &status); - checkError(status, "Failed to create fetch1 kernel"); - cl_kernel ffta_kernel = clCreateKernel(program, "fft3da", &status); - checkError(status, "Failed to create fft3da kernel"); - cl_kernel transpose_kernel = clCreateKernel(program, "transpose", &status); - checkError(status, "Failed to create transpose kernel"); - cl_kernel fftb_kernel = clCreateKernel(program, "fft3db", &status); - checkError(status, "Failed to create fft3db kernel"); - cl_kernel store1_kernel = clCreateKernel(program, "transposeStore1", &status); - checkError(status, "Failed to create store1 kernel"); - - cl_kernel fetch2_kernel = clCreateKernel(program, "fetchBitrev2", &status); - checkError(status, "Failed to create fetch2 kernel"); - cl_kernel fftc_kernel = clCreateKernel(program, "fft3dc", &status); - checkError(status, "Failed to create fft3dc kernel"); - cl_kernel store2_kernel = clCreateKernel(program, "transposeStore2", &status); - checkError(status, "Failed to create store2 kernel"); - - // Setup Queues to the kernels - queue_setup(); - - // Device memory buffers: using 1st and 2nd banks - // Double Buffers, using 3rd and 4th banks - cl_mem d_inData1, d_inData2, d_outData1, d_outData2; - d_inData1 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); - checkError(status, "Failed to allocate input device buffer\n"); - - d_inData2 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_3_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); - checkError(status, "Failed to allocate input device buffer\n"); - - d_outData1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); - - d_outData2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_3_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); - - cl_mem d_transpose; - d_transpose = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); - - // Default Kernel Arguments - status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData1); - checkError(status, "Failed to set fetch1 kernel arg"); - status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set ffta kernel arg"); - status=clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fftb kernel arg"); - status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_transpose); - checkError(status, "Failed to set store1 kernel arg"); - - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_transpose); - checkError(status, "Failed to set fetch2 kernel arg"); - status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fftc kernel arg"); - status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData1); - checkError(status, "Failed to set store2 kernel arg"); - - fft_time.exec_t = getTimeinMilliSec(); - - // First Phase - // Write to DDR first buffer - status = clEnqueueWriteBuffer(queue1, d_inData1, CL_TRUE, 0, sizeof(float2) * num_pts, inp, 0, NULL, NULL); - - status = clFinish(queue1); - checkError(status, "failed to finish"); - - // Second Phase - // Unblocking write to DDR second buffer from index num_pts - cl_event write_event[2]; - status = clEnqueueWriteBuffer(queue6, d_inData2, CL_FALSE, 0, sizeof(float2) * num_pts, (void*)&inp[num_pts], 0, NULL, &write_event[0]); - checkError(status, "Failed to write to DDR buffer"); - - // Compute First FFT already transferred - status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - - status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); - - status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second fft kernel"); - - status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second transpose kernel"); - - status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - - status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue3, store2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); - - // Check finish of transfer and computations - status = clFinish(queue6); - checkError(status, "failed to finish"); - status = clFinish(queue5); - checkError(status, "failed to finish"); - status = clFinish(queue4); - checkError(status, "failed to finish"); - status = clFinish(queue3); - checkError(status, "failed to finish"); - status = clFinish(queue2); - checkError(status, "failed to finish"); - status = clFinish(queue1); - checkError(status, "failed to finish"); - - clWaitForEvents(1, &write_event[0]); - clReleaseEvent(write_event[0]); - - // Loop over the 3 stages - for(size_t i = 2; i < how_many; i++){ - - // Unblocking transfers between DDR and host - if( (i % 2) == 0){ - status = clEnqueueReadBuffer(queue6, d_outData1, CL_FALSE, 0, sizeof(float2) * num_pts, &out[((i - 2) * num_pts)], 0, NULL, &write_event[0]); - checkError(status, "Failed to read from DDR buffer"); - - status = clEnqueueWriteBuffer(queue7, d_inData1, CL_FALSE, 0, sizeof(float2) * num_pts, &inp[(i * num_pts)], 0, NULL, &write_event[1]); - checkError(status, "Failed to write to DDR buffer"); - - status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData2); - checkError(status, "Failed to set fetch1 kernel arg"); - - status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData2); - checkError(status, "Failed to set store2 kernel arg"); - } - else{ - status = clEnqueueReadBuffer(queue6, d_outData2, CL_FALSE, 0, sizeof(float2) * num_pts, &out[((i - 2) * num_pts)], 0, NULL, &write_event[0]); - checkError(status, "Failed to read from DDR buffer"); - - status = clEnqueueWriteBuffer(queue7, d_inData2, CL_FALSE, 0, sizeof(float2) * num_pts, &inp[(i * num_pts)], 0, NULL, &write_event[1]); - checkError(status, "Failed to write to DDR buffer"); - - status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData1); - checkError(status, "Failed to set fetch1 kernel arg"); - - status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData1); - checkError(status, "Failed to set store2 kernel arg"); - } - - // Set Kernel Arguments before execution - status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - - status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); - - status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second fft kernel"); - - status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second transpose kernel"); - - status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - - status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue3, store2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); - - status = clFinish(queue7); - checkError(status, "failed to finish"); - status = clFinish(queue6); - checkError(status, "failed to finish"); - status = clFinish(queue5); - checkError(status, "failed to finish"); - status = clFinish(queue4); - checkError(status, "failed to finish"); - status = clFinish(queue3); - checkError(status, "failed to finish"); - status = clFinish(queue2); - checkError(status, "failed to finish"); - status = clFinish(queue1); - checkError(status, "failed to finish"); - - clWaitForEvents(2, write_event); - clReleaseEvent(write_event[0]); - clReleaseEvent(write_event[1]); - } - - if( (how_many % 2) == 0){ - status = clEnqueueReadBuffer(queue6, d_outData1, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(how_many - 2) * num_pts], 0, NULL, &write_event[0]); - checkError(status, "Failed to read from DDR buffer"); - - status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData2); - checkError(status, "Failed to set fetch1 kernel arg"); - - status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData2); - checkError(status, "Failed to set store2 kernel arg"); - } - else{ - status = clEnqueueReadBuffer(queue6, d_outData2, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(how_many - 2) * num_pts], 0, NULL, &write_event[0]); - checkError(status, "Failed to read from DDR buffer"); - - status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData1); - checkError(status, "Failed to set fetch1 kernel arg"); - - status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData1); - checkError(status, "Failed to set store2 kernel arg"); - } - - status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - - status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); - - status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second fft kernel"); - - status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second transpose kernel"); - - status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - - status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue3, store2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); - - status = clFinish(queue6); - checkError(status, "failed to finish"); - status = clFinish(queue5); - checkError(status, "failed to finish"); - status = clFinish(queue4); - checkError(status, "failed to finish"); - status = clFinish(queue3); - checkError(status, "failed to finish"); - status = clFinish(queue2); - checkError(status, "failed to finish"); - status = clFinish(queue1); - checkError(status, "failed to finish"); - - clWaitForEvents(1, &write_event[0]); - clReleaseEvent(write_event[0]); - - if( (how_many % 2) == 0){ - status = clEnqueueReadBuffer(queue6, d_outData2, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(how_many - 1) * num_pts], 0, NULL, &write_event[0]); - checkError(status, "Failed to read from DDR buffer"); - } - else{ - status = clEnqueueReadBuffer(queue6, d_outData1, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(how_many - 1) * num_pts], 0, NULL, &write_event[0]); - checkError(status, "Failed to read from DDR buffer"); - } - - status = clFinish(queue6); - checkError(status, "failed to finish reading DDR using PCIe"); - - clWaitForEvents(1, &write_event[0]); - clReleaseEvent(write_event[0]); - - fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; - checkError(status, "Failed to copy data from device"); - - queue_cleanup(); - - if (d_inData1) - clReleaseMemObject(d_inData1); - if (d_inData2) - clReleaseMemObject(d_inData2); - - if (d_outData2) - clReleaseMemObject(d_outData2); - if (d_outData2) - clReleaseMemObject(d_outData2); - - if (d_transpose) - clReleaseMemObject(d_transpose); - - if(fetch1_kernel) - clReleaseKernel(fetch1_kernel); - if(fetch2_kernel) - clReleaseKernel(fetch2_kernel); - - if(ffta_kernel) - clReleaseKernel(ffta_kernel); - if(fftb_kernel) - clReleaseKernel(fftb_kernel); - if(fftc_kernel) - clReleaseKernel(fftc_kernel); - - if(transpose_kernel) - clReleaseKernel(transpose_kernel); - - if(store1_kernel) - clReleaseKernel(store1_kernel); - if(store2_kernel) - clReleaseKernel(store2_kernel); - - fft_time.valid = 1; - return fft_time; -} -*/ \ No newline at end of file +} \ No newline at end of file diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 3986922..ee8b145 100755 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -4,7 +4,7 @@ project(examplesfftfpga VERSION 0.1 DESCRIPTION "Example Code that uses libfftfpga" LANGUAGES C CXX) -set(examples fft3d_ddr fft3d_bram fft3d_ddr_svm fft3d_ddr_batch fft3d_ddr_svm_batch fft2d fft1d fft1d_svm) +set(examples fft3d_ddr fft3d_bram fft3d_ddr_svm fft3d_ddr_batch fft3d_ddr_svm_batch fft2d fft1d fft1d_svm fft1d_batch) # create a target for each of the example foreach(example ${examples}) diff --git a/examples/common/helper.c b/examples/common/helper.c index 2769648..f88e2d7 100755 --- a/examples/common/helper.c +++ b/examples/common/helper.c @@ -87,7 +87,7 @@ void print_config(int N, int dim, int iter, bool inv, bool sp, int batch, bool u * \param inv: true if backward transform * \param single precision floating point transformation */ -void display_measures(double total_api_time, double pcie_rd, double pcie_wr, double exec_t, int N, int dim, int iter, int batch, bool inv, bool sp){ +void display_measures(double total_api_time, double pcie_rd, double pcie_wr, double exec_t, double hw_pcie_rd, double hw_pcie_wr, double hw_exec, int N, int dim, int iter, int batch, bool inv, bool sp){ double avg_api_time = 0.0; @@ -99,7 +99,11 @@ void display_measures(double total_api_time, double pcie_rd, double pcie_wr, dou double pcie_write = pcie_wr / iter; double exec = exec_t / iter; - double gpoints_per_sec = (batch * pow(N, dim)) / (exec * 1e-3 * 1024 * 1024 * 1024); + double hw_pcie_read = hw_pcie_rd / iter; + double hw_pcie_write = hw_pcie_wr / iter; + double hw_execution = hw_exec / iter; + + double gpoints_per_sec = (batch * pow(N, dim)) / (hw_execution * 1e-3 * 1024 * 1024 * 1024); double gBytes_per_sec = 0.0; if(sp){ @@ -114,19 +118,23 @@ void display_measures(double total_api_time, double pcie_rd, double pcie_wr, dou printf("\n\n------------------------------------------\n"); printf("Measurements \n"); printf("--------------------------------------------\n"); - printf("Points = %d%s \n", N, dim == 1 ? "" : dim == 2 ? "^2" : "^3"); - printf("Precision = %s\n", sp ? "Single": "Double"); - printf("Direction = %s\n", inv ? "Backward":"Forward"); - printf("Iterations = %d\n", iter); - printf("Batch = %d\n", batch); + printf("Points = %d%s \n", N, dim == 1 ? "" : dim == 2 ? "^2" : "^3"); + printf("Precision = %s\n", sp ? "Single": "Double"); + printf("Direction = %s\n", inv ? "Backward":"Forward"); + printf("Iterations = %d\n", iter); + printf("Batch = %d\n", batch); printf("%s", iter>1 ? "Average Measurements of iterations\n":""); - printf("PCIe Write = %.2lfms\n", pcie_write); - printf("Kernel Execution = %.2lfms\n", exec); - printf("PCIe Read = %.2lfms\n", pcie_read); - printf("Total = %.2lfms\n", pcie_read + exec + pcie_write); - printf("Throughput = %.2lfGFLOPS/s | %.2lf GB/s\n", gflops, gBytes_per_sec); - printf("API runtime = %.2lfms\n", avg_api_time); + printf("PCIe Write = %.4lfms\n", pcie_write); + printf("Kernel Execution = %.4lfms\n", exec); + printf("PCIe Read = %.4lfms\n", pcie_read); + printf("Total = %.4lfms\n", pcie_read + exec + pcie_write); + printf("HW PCIe Write = %.4lfms\n", hw_pcie_write); + printf("HW Kernel Execution = %.4lfms\n", hw_execution); + printf("HW PCIe Read = %.4lfms\n", hw_pcie_read); + printf("Hw Total = %.4lfms\n", hw_pcie_write + hw_execution + hw_pcie_read); + printf("Throughput = %.4lfGFLOPS/s | %.4lf GB/s\n", gflops, gBytes_per_sec); + printf("API runtime = %.4lfms\n", avg_api_time); } /** diff --git a/examples/common/helper.h b/examples/common/helper.h index 8ea0ba7..36c99bb 100755 --- a/examples/common/helper.h +++ b/examples/common/helper.h @@ -12,7 +12,7 @@ bool fft_create_data(double2 *inp, int N); void print_config(int N, int dim, int iter, bool inv, bool sp, int batch, bool use_bram, bool interleaving); -void display_measures(double total_api_time, double pcie_rd, double pcie_wr, double exec, int N, int dim, int iter, int batch, bool inv, bool sp); +void display_measures(double total_api_time, double pcie_rd, double pcie_wr, double exec, double avg_shw_pcie_rd, double avg_hw_pcie_wr, double avg_hw_exec, int N, int dim, int iter, int batch, bool inv, bool sp); double getTimeinMilliseconds(); #endif // HELPER_H diff --git a/examples/common/verify_fftw.c b/examples/common/verify_fftw.c index 286962d..c0e2653 100644 --- a/examples/common/verify_fftw.c +++ b/examples/common/verify_fftw.c @@ -70,7 +70,7 @@ bool verify_fftwf(float2 *fpgaout, float2 *verify, int N, int dim, bool inverse, mag_sum += magnitude; noise_sum += noise; #ifndef NDEBUG - printf("%zu : fpga - (%e %e) cpu - (%e %e)\n", i, fpgaout[i].x, fpgaout[i].y, fftw_data[i][0], fftw_data[i][1]); + //printf("%zu : fpga - (%e %e) cpu - (%e %e)\n", i, fpgaout[i].x, fpgaout[i].y, fftw_data[i][0], fftw_data[i][1]); #endif } diff --git a/examples/common/verify_fftw.h b/examples/common/verify_fftw.h index 4a7c759..c31107c 100644 --- a/examples/common/verify_fftw.h +++ b/examples/common/verify_fftw.h @@ -5,6 +5,6 @@ #include -bool verify_fftwf(float2 *fpgaout, float2 *verify, int N, int dim, bool inverse, int how_many); +bool verify_fftwf(float2 *fpgaout, const float2 *verify, int N, int dim, bool inverse, int how_many); #endif // FFT3D_FFTW_H \ No newline at end of file diff --git a/examples/fft1d.c b/examples/fft1d.c index f45b0a3..2ae3e33 100644 --- a/examples/fft1d.c +++ b/examples/fft1d.c @@ -25,8 +25,9 @@ int main(int argc, const char **argv) { char *path = "fft1d_emulate.aocx"; const char *platform = "Intel(R) FPGA"; - fpga_t timing = {0.0, 0.0, 0.0, 0}; + fpga_t timing = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; + double avg_hw_rd = 0.0, avg_hw_wr = 0.0, avg_hw_exec = 0.0; double temp_timer = 0.0, total_api_time = 0.0; struct argparse_option options[] = { @@ -93,11 +94,19 @@ int main(int argc, const char **argv) { avg_rd += timing.pcie_read_t; avg_wr += timing.pcie_write_t; avg_exec += timing.exec_t; + avg_hw_rd += timing.hw_pcie_read_t; + avg_hw_wr += timing.hw_pcie_write_t; + avg_hw_exec += timing.hw_exec_t; printf("Iter: %lu\n", i); printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); printf("\tKernel: %lfms\n", timing.exec_t); printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); + + printf("Hw Counters: \n"); + printf("\tHW PCIe Rd: %lfms\n", timing.hw_pcie_read_t); + printf("\tHW Kernel: %lfms\n", timing.hw_exec_t); + printf("\tHW PCIe Wr: %lfms\n\n", timing.hw_pcie_write_t); } // destroy FFT input and output @@ -107,8 +116,8 @@ int main(int argc, const char **argv) { // destroy data fpga_final(); - - display_measures(total_api_time, avg_rd, avg_wr, avg_exec, N, dim, iter, batch, inv, sp); + // display performance measures + display_measures(total_api_time, avg_rd, avg_wr, avg_exec, avg_hw_rd, avg_hw_wr, avg_hw_exec, N, dim, iter, batch, inv, sp); return EXIT_SUCCESS; } \ No newline at end of file diff --git a/examples/fft1d_batch.c b/examples/fft1d_batch.c new file mode 100644 index 0000000..c3d8e30 --- /dev/null +++ b/examples/fft1d_batch.c @@ -0,0 +1,130 @@ +// Author: Arjun Ramaswami + +#include +#include // EXIT_FAILURE +#include +#include + +#include "CL/opencl.h" +#include "fftfpga/fftfpga.h" + +#include "argparse.h" +#include "helper.h" +#include "verify_fftw.h" + +static const char *const usage[] = { + "bin/host [options]", + NULL, +}; + +int main(int argc, const char **argv) { + int N = 64, dim = 1, iter = 1, batch = 1; + + bool use_bram = false, sp = true, inv = false, use_svm = false, interleaving = false; + bool status = true, use_emulator = false; + + char *path = "fft1d_emulate.aocx"; + const char *platform = "Intel(R) FPGA"; + + fpga_t timing = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; + double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; + double avg_hw_rd = 0.0, avg_hw_wr = 0.0, avg_hw_exec = 0.0; + double temp_timer = 0.0, total_api_time = 0.0; + + struct argparse_option options[] = { + OPT_HELP(), + OPT_GROUP("Basic Options"), + OPT_INTEGER('n',"n", &N, "FFT Points"), + OPT_INTEGER('i',"iter", &iter, "Iterations"), + OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), + OPT_INTEGER('c',"batch", &batch, "Batch"), + OPT_STRING('p', "path", &path, "Path to bitstream"), + OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), + OPT_END(), + }; + + struct argparse argparse; + argparse_init(&argparse, options, usage, 0); + argparse_describe(&argparse, "Computing FFT using FPGA", "FFT size and dimensions are mandatory, default dimension and number of iterations are 1"); + argc = argparse_parse(&argparse, argc, argv); + + // Print to console the configuration chosen to execute during runtime + print_config(N, dim, iter, inv, sp, batch, use_bram, interleaving); + + if(use_emulator){ + platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; + } + else{ + platform = "Intel(R) FPGA SDK for OpenCL(TM)"; + } + + int isInit = fpga_initialize(platform, path, use_svm); + if(isInit != 0){ + return EXIT_FAILURE; + } + + size_t inp_sz = sizeof(float2) * N * batch; + + float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); + float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); + + // find the average of iterations of batched 1D FFTs + // random data every iteration and every batch + for(size_t i = 0; i < iter; i++){ + + status = fftf_create_data(inp, N * batch); + if(!status){ + fprintf(stderr, "Error in Data Creation \n"); + free(inp); + free(out); + return EXIT_FAILURE; + } + + temp_timer = getTimeinMilliseconds(); + timing = fftfpgaf_c2c_1d_batch(N, inp, out, inv, batch); + total_api_time += getTimeinMilliseconds() - temp_timer; + +#ifdef USE_FFTW + if(!verify_fftwf(out, inp, N, 1, inv, batch)){ + fprintf(stderr, "1d FFT Verification Failed \n"); + free(inp); + free(out); + return EXIT_FAILURE; + } +#endif + // TODO: Verification of bit reversed output + if(timing.valid == 0){ + fprintf(stderr, "Invalid execution, timing found to be 0"); + free(inp); + free(out); + return EXIT_FAILURE; + } + avg_rd += timing.pcie_read_t; + avg_wr += timing.pcie_write_t; + avg_exec += timing.exec_t; + avg_hw_rd += timing.hw_pcie_read_t; + avg_hw_wr += timing.hw_pcie_write_t; + avg_hw_exec += timing.hw_exec_t; + + printf("Iter: %lu\n", i); + printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); + printf("\tKernel: %lfms\n", timing.exec_t); + printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); + + printf("Hw Counters: \n"); + printf("\tHW PCIe Rd: %lfms\n", timing.hw_pcie_read_t); + printf("\tHW Kernel: %lfms\n", timing.hw_exec_t); + printf("\tHW PCIe Wr: %lfms\n\n", timing.hw_pcie_write_t); + } + + // destroy FFT input and output + free(inp); + free(out); + + // destroy data + fpga_final(); + + display_measures(total_api_time, avg_rd, avg_wr, avg_exec, avg_hw_rd, avg_hw_wr, avg_hw_exec, N, dim, iter, batch, inv, sp); + + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/examples/fft1d_svm.c b/examples/fft1d_svm.c index ba020cf..a761f6e 100644 --- a/examples/fft1d_svm.c +++ b/examples/fft1d_svm.c @@ -25,15 +25,15 @@ int main(int argc, const char **argv) { char *path = "fft1d_emulate.aocx"; const char *platform = "Intel(R) FPGA"; - fpga_t timing = {0.0, 0.0, 0.0, 0}; + fpga_t timing = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; + double avg_hw_rd = 0.0, avg_hw_wr = 0.0, avg_hw_exec = 0.0; double temp_timer = 0.0, total_api_time = 0.0; struct argparse_option options[] = { OPT_HELP(), OPT_GROUP("Basic Options"), OPT_INTEGER('n',"n", &N, "FFT Points"), - OPT_BOOLEAN('s',"sp", &sp, "Single Precision"), OPT_INTEGER('i',"iter", &iter, "Iterations"), OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), OPT_INTEGER('c',"batch", &batch, "Batch"), @@ -93,12 +93,20 @@ int main(int argc, const char **argv) { avg_rd += timing.pcie_read_t; avg_wr += timing.pcie_write_t; avg_exec += timing.exec_t; + avg_hw_rd += timing.hw_pcie_read_t; + avg_hw_wr += timing.hw_pcie_write_t; + avg_hw_exec += timing.hw_exec_t; printf("Iter: %lu\n", i); printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); printf("\tKernel: %lfms\n", timing.exec_t); printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); + printf("Hw Counters: \n"); + printf("\tHW PCIe Rd: %lfms\n", timing.hw_pcie_read_t); + printf("\tHW Kernel: %lfms\n", timing.hw_exec_t); + printf("\tHW PCIe Wr: %lfms\n\n", timing.hw_pcie_write_t); + } // destroy FFT input and output free(inp); @@ -107,7 +115,7 @@ int main(int argc, const char **argv) { // destroy data fpga_final(); - display_measures(total_api_time, avg_rd, avg_wr, avg_exec, N, dim, iter, batch, inv, sp); + display_measures(total_api_time, avg_rd, avg_wr, avg_exec, avg_hw_rd, avg_hw_wr, avg_hw_exec, N, dim, iter, batch, inv, sp); return EXIT_SUCCESS; } \ No newline at end of file diff --git a/examples/fft2d.c b/examples/fft2d.c index 6483b7e..88e1e66 100644 --- a/examples/fft2d.c +++ b/examples/fft2d.c @@ -27,8 +27,9 @@ int main(int argc, const char **argv) { char *path = "fft2d_emulate.aocx"; const char *platform = "Intel(R) FPGA"; - fpga_t timing = {0.0, 0.0, 0.0, 0}; + fpga_t timing = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; + double avg_hw_rd = 0.0, avg_hw_wr = 0.0, avg_hw_exec = 0.0; double temp_timer = 0.0, total_api_time = 0.0; struct argparse_option options[] = { @@ -111,11 +112,19 @@ int main(int argc, const char **argv) { avg_rd += timing.pcie_read_t; avg_wr += timing.pcie_write_t; avg_exec += timing.exec_t; + avg_hw_rd += timing.hw_pcie_read_t; + avg_hw_wr += timing.hw_pcie_write_t; + avg_hw_exec += timing.hw_exec_t; printf("Iter: %lu\n", i); printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); printf("\tKernel: %lfms\n", timing.exec_t); printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); + + printf("Hw Counters: \n"); + printf("\tHW PCIe Rd: %lfms\n", timing.hw_pcie_read_t); + printf("\tHW Kernel: %lfms\n", timing.hw_exec_t); + printf("\tHW PCIe Wr: %lfms\n\n", timing.hw_pcie_write_t); } // iter @@ -127,7 +136,7 @@ int main(int argc, const char **argv) { fpga_final(); // display performance measures - display_measures(total_api_time, avg_rd, avg_wr, avg_exec, N, dim, iter, batch, inv, sp); - + display_measures(total_api_time, avg_rd, avg_wr, avg_exec, avg_hw_rd, avg_hw_wr, avg_hw_exec, N, dim, iter, batch, inv, sp); + return EXIT_SUCCESS; } diff --git a/examples/fft2d_bram_svm.c b/examples/fft2d_bram_svm.c new file mode 100644 index 0000000..425013d --- /dev/null +++ b/examples/fft2d_bram_svm.c @@ -0,0 +1,131 @@ +// Author: Arjun Ramaswami + +#include +#include // EXIT_FAILURE +#include +#include + +#include "CL/opencl.h" +#include "fftfpga/fftfpga.h" + +#include "argparse.h" +#include "helper.h" +#include "verify_fftw.h" + +static const char *const usage[] = { + "bin/host [options]", + NULL, +}; + +int main(int argc, const char **argv) { + int N = 64, dim = 2, iter = 1, batch = 1, how_many = 1; + + bool use_bram = true, interleaving = false, sp = true, inv = false; + bool status = true, use_emulator = false; + bool use_svm = true; + + char *path = "fft2d_emulate.aocx"; + const char *platform = "Intel(R) FPGA"; + + fpga_t timing = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; + double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; + double avg_hw_rd = 0.0, avg_hw_wr = 0.0, avg_hw_exec = 0.0; + double temp_timer = 0.0, total_api_time = 0.0; + + struct argparse_option options[] = { + OPT_HELP(), + OPT_GROUP("Basic Options"), + OPT_INTEGER('n',"n", &N, "FFT Points"), + OPT_BOOLEAN('s',"sp", &sp, "Single Precision"), + OPT_INTEGER('i',"iter", &iter, "Iterations"), + OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), + OPT_INTEGER('m',"how_many", &how_many, "How Many per Call"), + OPT_STRING('p', "path", &path, "Path to bitstream"), + OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), + OPT_END(), + }; + + struct argparse argparse; + argparse_init(&argparse, options, usage, 0); + argparse_describe(&argparse, "Computing FFT using FPGA", "FFT size and dimensions are mandatory, default dimension and number of iterations are 1"); + argc = argparse_parse(&argparse, argc, argv); + + // Print to console the configuration chosen to execute during runtime + print_config(N, dim, iter, inv, sp, how_many, use_bram, interleaving); + + if(use_emulator){ + platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; + } + else{ + platform = "Intel(R) FPGA SDK for OpenCL(TM)"; + } + + int isInit = fpga_initialize(platform, path, use_svm); + if(isInit != 0){ + return EXIT_FAILURE; + } + + size_t inp_sz = sizeof(float2) * N * N * how_many; + float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); + float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); + + for(size_t i = 0; i < iter; i++){ + + status = fftf_create_data(inp, inp_sz); + if(!status){ + free(inp); + free(out); + return EXIT_FAILURE; + } + + // use bram for 2d Transpose + temp_timer = getTimeinMilliseconds(); + timing = fftfpgaf_c2c_2d_bram_svm(N, inp, out, inv, how_many); + total_api_time += getTimeinMilliseconds() - temp_timer; + +#ifdef USE_FFTW + if(!verify_fftwf(out, inp, N, 2, inv, how_many)){ + fprintf(stderr, "2d FFT Verification Failed \n"); + free(inp); + free(out); + return EXIT_FAILURE; + } +#endif + if(timing.valid == 0){ + fprintf(stderr, "Invalid execution, timing found to be 0"); + free(inp); + free(out); + return EXIT_FAILURE; + } + + avg_rd += timing.pcie_read_t; + avg_wr += timing.pcie_write_t; + avg_exec += timing.exec_t; + avg_hw_rd += timing.hw_pcie_read_t; + avg_hw_wr += timing.hw_pcie_write_t; + avg_hw_exec += timing.hw_exec_t; + + printf("Iter: %lu\n", i); + printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); + printf("\tKernel: %lfms\n", timing.exec_t); + printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); + + printf("Hw Counters: \n"); + printf("\tHW PCIe Rd: %lfms\n", timing.hw_pcie_read_t); + printf("\tHW Kernel: %lfms\n", timing.hw_exec_t); + printf("\tHW PCIe Wr: %lfms\n\n", timing.hw_pcie_write_t); + + } // iter + + // destroy FFT input and output + free(inp); + free(out); + + // destroy fpga state + fpga_final(); + + // display performance measures + display_measures(total_api_time, avg_rd, avg_wr, avg_exec, avg_hw_rd, avg_hw_wr, avg_hw_exec, N, dim, iter, batch, inv, sp); + + return EXIT_SUCCESS; +} diff --git a/examples/fft3d_bram.c b/examples/fft3d_bram.c index 879b651..c5a3c68 100755 --- a/examples/fft3d_bram.c +++ b/examples/fft3d_bram.c @@ -27,8 +27,9 @@ int main(int argc, const char **argv) { char *path = "fft3d_emulate.aocx"; const char *platform; - fpga_t timing = {0.0, 0.0, 0.0, 0}; + fpga_t timing = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; + double avg_hw_rd = 0.0, avg_hw_wr = 0.0, avg_hw_exec = 0.0; double temp_timer = 0.0, total_api_time = 0.0; struct argparse_option options[] = { @@ -105,12 +106,19 @@ int main(int argc, const char **argv) { avg_rd += timing.pcie_read_t; avg_wr += timing.pcie_write_t; avg_exec += timing.exec_t; + avg_hw_rd += timing.hw_pcie_read_t; + avg_hw_wr += timing.hw_pcie_write_t; + avg_hw_exec += timing.hw_exec_t; printf("Iter: %lu\n", i); printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); printf("\tKernel: %lfms\n", timing.exec_t); printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); + printf("Hw Counters: \n"); + printf("\tHW PCIe Rd: %lfms\n", timing.hw_pcie_read_t); + printf("\tHW Kernel: %lfms\n", timing.hw_exec_t); + printf("\tHW PCIe Wr: %lfms\n\n", timing.hw_pcie_write_t); } // iter // destroy FFT input and output free(inp); @@ -120,7 +128,7 @@ int main(int argc, const char **argv) { fpga_final(); // display performance measures - display_measures(total_api_time, avg_rd, avg_wr, avg_exec, N, dim, iter, batch, inv, sp); + display_measures(total_api_time, avg_rd, avg_wr, avg_exec, avg_hw_rd, avg_hw_wr, avg_hw_exec, N, dim, iter, batch, inv, sp); return EXIT_SUCCESS; } diff --git a/examples/fft3d_ddr.c b/examples/fft3d_ddr.c index dfae683..901b6e1 100755 --- a/examples/fft3d_ddr.c +++ b/examples/fft3d_ddr.c @@ -27,10 +27,10 @@ int main(int argc, const char **argv) { char *path = "fft3d_emulate.aocx"; const char *platform; - fpga_t timing = {0.0, 0.0, 0.0, 0}; + fpga_t timing = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; + double avg_hw_rd = 0.0, avg_hw_wr = 0.0, avg_hw_exec = 0.0; double temp_timer = 0.0, total_api_time = 0.0; - double data_timer = 0.0; struct argparse_option options[] = { OPT_HELP(), @@ -75,7 +75,7 @@ int main(int argc, const char **argv) { for(size_t i = 0; i < iter; i++){ // create and destroy data every iteration - data_timer = getTimeinMilliseconds(); + double data_timer = getTimeinMilliseconds(); status = fftf_create_data(inp, N * N * N); data_timer = getTimeinMilliseconds() - data_timer; if(!status){ @@ -109,12 +109,20 @@ int main(int argc, const char **argv) { avg_rd += timing.pcie_read_t; avg_wr += timing.pcie_write_t; avg_exec += timing.exec_t; + avg_hw_rd += timing.hw_pcie_read_t; + avg_hw_wr += timing.hw_pcie_write_t; + avg_hw_exec += timing.hw_exec_t; printf("Iter: %lu\n", i); printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); printf("\tKernel: %lfms\n", timing.exec_t); printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); + printf("Hw Counters: \n"); + printf("\tHW PCIe Rd: %lfms\n", timing.hw_pcie_read_t); + printf("\tHW Kernel: %lfms\n", timing.hw_exec_t); + printf("\tHW PCIe Wr: %lfms\n\n", timing.hw_pcie_write_t); + } // iter // destroy FFT input and output free(inp); @@ -124,7 +132,7 @@ int main(int argc, const char **argv) { fpga_final(); // display performance measures - display_measures(total_api_time, avg_rd, avg_wr, avg_exec, N, dim, iter, batch, inv, sp); + display_measures(total_api_time, avg_rd, avg_wr, avg_exec, avg_hw_rd, avg_hw_wr, avg_hw_exec, N, dim, iter, batch, inv, sp); return EXIT_SUCCESS; } diff --git a/examples/fft3d_ddr_batch.c b/examples/fft3d_ddr_batch.c new file mode 100755 index 0000000..a949dfe --- /dev/null +++ b/examples/fft3d_ddr_batch.c @@ -0,0 +1,139 @@ +// Author: Arjun Ramaswami + +#include +#include // EXIT_FAILURE +#include +#include + +#include "CL/opencl.h" +#include "fftfpga/fftfpga.h" + +#include "argparse.h" +#include "helper.h" +#include "verify_fftw.h" + +static const char *const usage[] = { + "bin/host [options]", + NULL, +}; + +int main(int argc, const char **argv) { + int N = 64, dim = 3, iter = 1, batch = 1; + + bool inv = false, sp = true; + bool use_bram = false, interleaving = false, use_svm = false; + bool status = true, use_emulator = false; + + char *path = "fft3d_emulate.aocx"; + const char *platform; + + fpga_t timing = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; + double avg_hw_rd = 0.0, avg_hw_wr = 0.0, avg_hw_exec = 0.0; + double temp_timer = 0.0, total_api_time = 0.0; + double data_timer = 0.0; + + struct argparse_option options[] = { + OPT_HELP(), + OPT_GROUP("Basic Options"), + OPT_INTEGER('n',"n", &N, "FFT Points"), + OPT_BOOLEAN('s',"sp", &sp, "Single Precision"), + OPT_INTEGER('i',"iter", &iter, "Iterations"), + OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), + OPT_INTEGER('c',"batch", &batch, "Batch"), + OPT_BOOLEAN('t',"interleaving", &interleaving, "Use burst interleaving in case of BRAM designs"), + OPT_STRING('p', "path", &path, "Path to bitstream"), + OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), + OPT_END(), + }; + + struct argparse argparse; + argparse_init(&argparse, options, usage, 0); + argparse_describe(&argparse, "Computing FFT using FPGA", "FFT size and dimensions are mandatory, default dimension and number of iterations are 1"); + argc = argparse_parse(&argparse, argc, argv); + + // Print to console the configuration chosen to execute during runtime + print_config(N, dim, iter, inv, sp, batch, use_bram, interleaving); + + if(use_emulator){ + platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; + //platform = "Intel(R) FPGA"; + } + else{ + platform = "Intel(R) FPGA SDK for OpenCL(TM)"; + //platform = "Intel(R) FPGA"; + } + + int isInit = fpga_initialize(platform, path, use_svm); + if(isInit != 0){ + fprintf(stderr, "FPGA initialization error\n"); + return EXIT_FAILURE; + } + + size_t inp_sz = sizeof(float2) * N * N * N * batch; + float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); + float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); + + for(size_t i = 0; i < iter; i++){ + + // create and destroy data every iteration + data_timer = getTimeinMilliseconds(); + status = fftf_create_data(inp, N * N * N * batch); + data_timer = getTimeinMilliseconds() - data_timer; + if(!status){ + fprintf(stderr, "Error in Data Creation \n"); + free(inp); + free(out); + return EXIT_FAILURE; + } + printf("Time to Create Data: %lfsec for %uMB\n", data_timer * 1e-3, (N*N*N* 8 / (1024 * 1024))); + + // use ddr for 3d Transpose + temp_timer = getTimeinMilliseconds(); + timing = fftfpgaf_c2c_3d_ddr_batch(N, inp, out, inv, interleaving, batch); + total_api_time += getTimeinMilliseconds() - temp_timer; + +#ifdef USE_FFTW + if(!verify_fftwf(out, inp, N, 3, inv, batch)){ + fprintf(stderr, "3d FFT Verification Failed \n"); + free(inp); + free(out); + return EXIT_FAILURE; + } +#endif + if(timing.valid == 0){ + fprintf(stderr, "Invalid execution, timing found to be 0"); + free(inp); + free(out); + return EXIT_FAILURE; + } + + avg_rd += timing.pcie_read_t; + avg_wr += timing.pcie_write_t; + avg_exec += timing.exec_t; + avg_hw_rd += timing.hw_pcie_read_t; + avg_hw_wr += timing.hw_pcie_write_t; + avg_hw_exec += timing.hw_exec_t; + + printf("Iter: %lu\n", i); + printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); + printf("\tKernel: %lfms\n", timing.exec_t); + printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); + + printf("Hw Counters: \n"); + printf("\tHW PCIe Rd: %lfms\n", timing.hw_pcie_read_t); + printf("\tHW Kernel: %lfms\n", timing.hw_exec_t); + printf("\tHW PCIe Wr: %lfms\n\n", timing.hw_pcie_write_t); + + } // iter + // destroy FFT input and output + free(inp); + free(out); + + // destroy fpga state + fpga_final(); + + // display performance measures + display_measures(total_api_time, avg_rd, avg_wr, avg_exec, avg_hw_rd, avg_hw_wr, avg_hw_exec, N, dim, iter, batch, inv, sp); + + return EXIT_SUCCESS; +} diff --git a/examples/fft3d_ddr_svm.c b/examples/fft3d_ddr_svm.c index 3310682..cf449d8 100755 --- a/examples/fft3d_ddr_svm.c +++ b/examples/fft3d_ddr_svm.c @@ -27,8 +27,9 @@ int main(int argc, const char **argv) { char *path = "fft3d_emulate.aocx"; const char *platform; - fpga_t timing = {0.0, 0.0, 0.0, 0}; + fpga_t timing = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; + double avg_hw_rd = 0.0, avg_hw_wr = 0.0, avg_hw_exec = 0.0; double temp_timer = 0.0, total_api_time = 0.0; struct argparse_option options[] = { @@ -73,17 +74,21 @@ int main(int argc, const char **argv) { float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); for(size_t i = 0; i < iter; i++){ + + double data_timer = getTimeinMilliseconds(); status = fftf_create_data(inp, N * N * N); + data_timer = getTimeinMilliseconds() - data_timer; if(!status){ fprintf(stderr, "Error in Data Creation \n"); free(inp); free(out); return EXIT_FAILURE; } + printf("Time to Create Data: %lfsec for %uMB\n", data_timer * 1e-3, (N*N*N* 8 / (1024 * 1024))); // use ddr for 3d Transpose temp_timer = getTimeinMilliseconds(); - timing = fftfpgaf_c2c_3d_ddr_svm(N, inp, out, inv); + timing = fftfpgaf_c2c_3d_ddr_svm(N, inp, out, inv, interleaving); total_api_time += getTimeinMilliseconds() - temp_timer; #ifdef USE_FFTW @@ -104,11 +109,19 @@ int main(int argc, const char **argv) { avg_rd += timing.pcie_read_t; avg_wr += timing.pcie_write_t; avg_exec += timing.exec_t; + avg_hw_rd += timing.hw_pcie_read_t; + avg_hw_wr += timing.hw_pcie_write_t; + avg_hw_exec += timing.hw_exec_t; printf("Iter: %lu\n", i); printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); printf("\tKernel: %lfms\n", timing.exec_t); printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); + + printf("Hw Counters: \n"); + printf("\tHW PCIe Rd: %lfms\n", timing.hw_pcie_read_t); + printf("\tHW Kernel: %lfms\n", timing.hw_exec_t); + printf("\tHW PCIe Wr: %lfms\n\n", timing.hw_pcie_write_t); } // iter // destroy FFT input and output @@ -119,7 +132,7 @@ int main(int argc, const char **argv) { fpga_final(); // display performance measures - display_measures(total_api_time, avg_rd, avg_wr, avg_exec, N, dim, iter, batch, inv, sp); + display_measures(total_api_time, avg_rd, avg_wr, avg_exec, avg_hw_rd, avg_hw_wr, avg_hw_exec, N, dim, iter, batch, inv, sp); return EXIT_SUCCESS; } diff --git a/examples/fft3d_ddr_svm_batch.c b/examples/fft3d_ddr_svm_batch.c index e93ef1c..c335453 100755 --- a/examples/fft3d_ddr_svm_batch.c +++ b/examples/fft3d_ddr_svm_batch.c @@ -26,10 +26,9 @@ int main(int argc, const char **argv) { char *path = "fft3d_emulate.aocx"; const char *platform; - fpga_t timing = {0.0, 0.0, 0.0, 0}; - + fpga_t timing = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; - double temp_timer = 0.0, total_api_time = 0.0; + double avg_hw_rd = 0.0, avg_hw_wr = 0.0, avg_hw_exec = 0.0; struct argparse_option options[] = { OPT_HELP(), @@ -68,6 +67,7 @@ int main(int argc, const char **argv) { return EXIT_FAILURE; } + double total_api_time = 0.0; // create and destroy data every iteration size_t inp_sz = sizeof(float2) * N * N * N * batch; float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); @@ -84,7 +84,7 @@ int main(int argc, const char **argv) { } // use ddr for 3d Transpose - temp_timer = getTimeinMilliseconds(); + double temp_timer = getTimeinMilliseconds(); timing = fftfpgaf_c2c_3d_ddr_svm_batch(N, inp, out, inv, batch); total_api_time += getTimeinMilliseconds() - temp_timer; @@ -106,6 +106,19 @@ int main(int argc, const char **argv) { avg_rd += timing.pcie_read_t; avg_wr += timing.pcie_write_t; avg_exec += timing.exec_t; + avg_hw_rd += timing.hw_pcie_read_t; + avg_hw_wr += timing.hw_pcie_write_t; + avg_hw_exec += timing.hw_exec_t; + + printf("Iter: %lu\n", i); + printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); + printf("\tKernel: %lfms\n", timing.exec_t); + printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); + + printf("Hw Counters: \n"); + printf("\tHW PCIe Rd: %lfms\n", timing.hw_pcie_read_t); + printf("\tHW Kernel: %lfms\n", timing.hw_exec_t); + printf("\tHW PCIe Wr: %lfms\n\n", timing.hw_pcie_write_t); } // iter @@ -117,7 +130,7 @@ int main(int argc, const char **argv) { fpga_final(); // display performance measures - display_measures(total_api_time, avg_rd, avg_wr, avg_exec, N, dim, iter, batch, inv, sp); + display_measures(total_api_time, avg_rd, avg_wr, avg_exec, avg_hw_rd, avg_hw_wr, avg_hw_exec, N, dim, iter, batch, inv, sp); return EXIT_SUCCESS; } diff --git a/examples/fft3d_svm.c b/examples/fft3d_svm.c index b04ad30..89d7696 100644 --- a/examples/fft3d_svm.c +++ b/examples/fft3d_svm.c @@ -27,8 +27,9 @@ int main(int argc, const char **argv) { char *path = "fft3d_emulate.aocx"; const char *platform; - fpga_t timing = {0.0, 0.0, 0.0, 0}; + fpga_t timing = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; + double avg_hw_rd = 0.0, avg_hw_wr = 0.0, avg_hw_exec = 0.0; double temp_timer = 0.0, total_api_time = 0.0; struct argparse_option options[] = { @@ -108,11 +109,19 @@ int main(int argc, const char **argv) { avg_rd += timing.pcie_read_t; avg_wr += timing.pcie_write_t; avg_exec += timing.exec_t; + avg_hw_rd += timing.hw_pcie_read_t; + avg_hw_wr += timing.hw_pcie_write_t; + avg_hw_exec += timing.hw_exec_t; printf("Iter: %lu\n", i); printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); printf("\tKernel: %lfms\n", timing.exec_t); printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); + + printf("Hw Counters: \n"); + printf("\tHW PCIe Rd: %lfms\n", timing.hw_pcie_read_t); + printf("\tHW Kernel: %lfms\n", timing.hw_exec_t); + printf("\tHW PCIe Wr: %lfms\n\n", timing.hw_pcie_write_t); } // iter @@ -125,7 +134,7 @@ int main(int argc, const char **argv) { fpga_final(); // display performance measures - display_measures(total_api_time, avg_rd, avg_wr, avg_exec, N, dim, iter, batch, inv, sp); - + display_measures(total_api_time, avg_rd, avg_wr, avg_exec, avg_hw_rd, avg_hw_wr, avg_hw_exec, N, dim, iter, batch, inv, sp); + return EXIT_SUCCESS; } diff --git a/kernels/CMakeLists.txt b/kernels/CMakeLists.txt index fd14810..974ac98 100644 --- a/kernels/CMakeLists.txt +++ b/kernels/CMakeLists.txt @@ -17,10 +17,14 @@ message("-- FFT size is ${FFT_SIZE}") math(EXPR DEPTH "1 << (${LOG_FFT_SIZE} + ${LOG_FFT_SIZE} - ${LOG_POINTS})") #set(BUF_LOC "DDR") -set(BUFFER_LOCATION "DDR" CACHE STRING "Buffer location of 3d Transpose") -set_property(CACHE BUFFER_LOCATION PROPERTY STRINGS "DDR" "device") +set(DDR_BUFFER_LOCATION "DDR" CACHE STRING "Buffer location of 3d Transpose") +set_property(CACHE DDR_BUFFER_LOCATION PROPERTY STRINGS "DDR" "device") -message("-- Buffer location for 3d Transpose is ${BUFFER_LOCATION}") +set(SVM_HOST_BUFFER_LOCATION "" CACHE STRING "SVM host buffer location") +set_property(CACHE SVM_HOST_BUFFER_LOCATION PROPERTY STRINGS "" "host") + +message("-- Buffer location for 3d Transpose is ${DDR_BUFFER_LOCATION}") +message("-- SVM host Buffer location ${SVM_BUFFER_LOCATION}") configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/common/fft_config.h.in" @@ -41,7 +45,7 @@ if(NOT DEFINED FPGA_BOARD_NAME) endif() ## Flags for different target options -set(AOC_FLAGS "-g -v -fp-relaxed -cl-single-precision-constant -no-interleaving=default" CACHE STRING "AOC compiler flags") +set(AOC_FLAGS "-g -v -fp-relaxed -cl-single-precision-constant -board-package=/cm/shared/opt/intel_oneapi/beta-10/intelfpgadpcpp/2021.1-beta10/board/intel_s10sx_pac_usm/ -no-interleaving=default" CACHE STRING "AOC compiler flags") separate_arguments(AOC_FLAGS) set(EMU_FLAGS "-legacy-emulator -march=emulator" CACHE STRING "AOC emulation flags") separate_arguments(EMU_FLAGS) diff --git a/kernels/common/fft_config.h.in b/kernels/common/fft_config.h.in index ae30dc5..1955739 100755 --- a/kernels/common/fft_config.h.in +++ b/kernels/common/fft_config.h.in @@ -11,7 +11,8 @@ #define DEPTH @DEPTH@ -#define BUFFER_LOCATION "@BUFFER_LOCATION@" +#define DDR_BUFFER_LOCATION "@DDR_BUFFER_LOCATION@" +#define SVM_HOST_BUFFER_LOCATION "@SVM_HOST_BUFFER_LOCATION@" #endif // FFT_CONFIG_H diff --git a/kernels/fft1d/fft1d.cl b/kernels/fft1d/fft1d.cl index e55ee1f..250e9ae 100644 --- a/kernels/fft1d/fft1d.cl +++ b/kernels/fft1d/fft1d.cl @@ -138,7 +138,8 @@ uint permute_gid (uint gid) { // group dimension (N/(8*CONT_FACTOR), num_iterations) __attribute__((reqd_work_group_size(CONT_FACTOR * POINTS, 1, 1))) -kernel void fetch(global float2 * restrict src) { +kernel +void fetch(__global __attribute__((buffer_location(SVM_HOST_BUFFER_LOCATION))) volatile float2 * restrict src) { // Each thread will fetch POINTS points. Need POINTS times to pass to FFT. const int BUF_SIZE = 1 << (LOG_CONT_FACTOR + LOGPOINTS + LOGPOINTS); @@ -179,8 +180,8 @@ kernel void fetch(global float2 * restrict src) { * 'inverse' toggles between the direct and the inverse transform */ -kernel void fft1d(global float2 * restrict dest, - int count, int inverse) { +kernel +void fft1d(__global __attribute__((buffer_location(SVM_HOST_BUFFER_LOCATION))) volatile float2 * restrict dest, int count, int inverse) { /* The FFT engine requires a sliding window array for data reordering; data * stored in this array is carried across loop iterations and shifted by one diff --git a/kernels/fft2d/fft2d_bram_opt.cl b/kernels/fft2d/fft2d_bram_opt.cl index f37a465..d35d88e 100644 --- a/kernels/fft2d/fft2d_bram_opt.cl +++ b/kernels/fft2d/fft2d_bram_opt.cl @@ -11,19 +11,19 @@ channel float2 chaninfft2db[POINTS] __attribute__((depth(POINTS))); channel float2 chaninTranspose[POINTS] __attribute__((depth(POINTS))); channel float2 chaninTransStore[POINTS] __attribute__((depth(POINTS))); -kernel void fetchBitrev(global volatile float2 * restrict src, int batch) { +kernel void fetchBitrev(global volatile float2 * restrict src, int how_many) { unsigned delay = (1 << (LOGN - LOGPOINTS)); // N / 8 bool is_bitrevA = false; float2 __attribute__((memory, numbanks(8))) buf[2][N]; // additional iterations to fill the buffers - for(unsigned step = 0; step < (batch * DEPTH) + delay; step++){ + for(unsigned step = 0; step < (how_many * DEPTH) + delay; step++){ unsigned where = (step & ((N * DEPTH) - 1)) * 8; float2x8 data; - if (step < (batch * DEPTH)) { + if (step < (how_many * DEPTH)) { data.i0 = src[where + 0]; data.i1 = src[where + 1]; data.i2 = src[where + 2]; @@ -58,7 +58,7 @@ kernel void fetchBitrev(global volatile float2 * restrict src, int batch) { } } -kernel void fft2da(int inverse, int batch) { +kernel void fft2da(int inverse, int how_many) { /* The FFT engine requires a sliding window for data reordering; data stored * in this array is carried across loop iterations and shifted by 1 element @@ -70,7 +70,7 @@ kernel void fft2da(int inverse, int batch) { // needs to run "N / 8 - 1" additional iterations to drain the last outputs #pragma loop_coalesce - for(unsigned j = 0; j < batch; j++){ + for(unsigned j = 0; j < how_many; j++){ for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { float2x8 data; @@ -108,23 +108,21 @@ kernel void fft2da(int inverse, int batch) { } } -kernel void transpose(int batch) { +kernel void transpose(int how_many) { const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 bool is_bufA = false, is_bitrevA = false; float2 buf[2][DEPTH][POINTS]; - //float2 bitrev_in[2][N], bitrev_out[2][N]; - //float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; float2 bitrev_in[2][N]; float2 __attribute__((memory, numbanks(8))) bitrev_out[2][N]; int initial_delay = DELAY + DELAY; // for each of the bitrev buffer // additional iterations to fill the buffers - for(int step = -initial_delay; step < ((batch * DEPTH) + DEPTH); step++){ + for(int step = -initial_delay; step < ((how_many * DEPTH) + DEPTH); step++){ float2x8 data, data_out; - if (step < ((batch * DEPTH) - initial_delay)) { + if (step < ((how_many * DEPTH) - initial_delay)) { data.i0 = read_channel_intel(chaninTranspose[0]); data.i1 = read_channel_intel(chaninTranspose[1]); data.i2 = read_channel_intel(chaninTranspose[2]); @@ -178,7 +176,7 @@ kernel void transpose(int batch) { } } -kernel void fft2db(int inverse, int batch) { +kernel void fft2db(int inverse, int how_many) { /* The FFT engine requires a sliding window for data reordering; data stored * in this array is carried across loop iterations and shifted by 1 element @@ -226,21 +224,20 @@ kernel void fft2db(int inverse, int batch) { } } -kernel void transposeStore(global volatile float2 * restrict dest, int batch) { +kernel void transposeStore(global volatile float2 * restrict dest, int how_many) { const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 bool is_bufA = false, is_bitrevA = false; float2 buf[2][DEPTH][POINTS]; - //float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; float2 bitrev_in[2][N]; int initial_delay = DELAY; // for each of the bitrev buffer // additional iterations to fill the buffers - for(int step = -initial_delay; step < ((batch * DEPTH) + DEPTH); step++){ + for(int step = -initial_delay; step < ((how_many * DEPTH) + DEPTH); step++){ float2x8 data, data_out; - if (step < ((batch * DEPTH) - initial_delay)) { + if (step < ((how_many * DEPTH) - initial_delay)) { data.i0 = read_channel_intel(chaninTransStore[0]); data.i1 = read_channel_intel(chaninTransStore[1]); data.i2 = read_channel_intel(chaninTransStore[2]); diff --git a/kernels/fft3d/fft3d_ddr.cl b/kernels/fft3d/fft3d_ddr.cl index 60f6c7e..9a35570 100755 --- a/kernels/fft3d/fft3d_ddr.cl +++ b/kernels/fft3d/fft3d_ddr.cl @@ -15,7 +15,7 @@ channel float2 chaninTranStore1[POINTS] __attribute__((depth(POINTS))); channel float2 chaninTranStore2[POINTS] __attribute__((depth(POINTS))); // Kernel that fetches data from global memory -kernel void fetchBitrev1(global volatile float2 * restrict src) { +kernel void fetchBitrev1(__global __attribute__((buffer_location(SVM_HOST_BUFFER_LOCATION))) volatile float2 * restrict src) { unsigned delay = (1 << (LOGN - LOGPOINTS)); // N / 8 bool is_bitrevA = false; @@ -230,7 +230,7 @@ kernel void fft3db(int inverse) { } __attribute__((max_global_work_dim(0))) -kernel void transposeStore1(__global __attribute__((buffer_location(BUFFER_LOCATION))) volatile float2 * restrict dest) { +kernel void transposeStore1(__global __attribute__((buffer_location(DDR_BUFFER_LOCATION))) volatile float2 * restrict dest) { const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 bool is_bufA = false, is_bitrevA = false; @@ -293,7 +293,7 @@ kernel void transposeStore1(__global __attribute__((buffer_location(BUFFER_LOCAT } } __attribute__((max_global_work_dim(0))) -kernel void fetchBitrev2(__global __attribute__((buffer_location(BUFFER_LOCATION))) volatile float2 * restrict src) { +kernel void fetchBitrev2(__global __attribute__((buffer_location(DDR_BUFFER_LOCATION))) volatile float2 * restrict src) { unsigned delay = (1 << (LOGN - LOGPOINTS)); // N / 8 bool is_bufA = false, is_bitrevA = false; @@ -417,7 +417,7 @@ kernel void fft3dc(int inverse) { } __attribute__((max_global_work_dim(0))) -kernel void transposeStore2(global float2 * restrict dest) { +kernel void transposeStore2(__global __attribute__((buffer_location(SVM_HOST_BUFFER_LOCATION))) volatile float2 * restrict dest) { const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 bool is_bufA = false, is_bitrevA = false; diff --git a/kernels/fft3d/fft3d_ddr_svm.cl b/kernels/fft3d/fft3d_ddr_svm.cl new file mode 100755 index 0000000..2059dce --- /dev/null +++ b/kernels/fft3d/fft3d_ddr_svm.cl @@ -0,0 +1,499 @@ +// Author: Arjun Ramaswami + +#include "fft_config.h" +#include "fft_8.cl" +#include "../matrixTranspose/diagonal_bitrev.cl" + +#pragma OPENCL EXTENSION cl_intel_channels : enable + +channel float2 chaninfft3da[POINTS] __attribute__((depth(POINTS))); +channel float2 chaninfft3db[POINTS] __attribute__((depth(POINTS))); +channel float2 chaninfft3dc[POINTS] __attribute__((depth(POINTS))); + +channel float2 chaninTranspose[POINTS] __attribute__((depth(POINTS))); +channel float2 chaninTranStore1[POINTS] __attribute__((depth(POINTS))); +channel float2 chaninTranStore2[POINTS] __attribute__((depth(POINTS))); + +// Kernel that fetches data from global memory +kernel void fetchBitrev1(__global __attribute__((buffer_location("host"))) volatile float2 * restrict src) { + unsigned delay = (1 << (LOGN - LOGPOINTS)); // N / 8 + bool is_bitrevA = false; + + float2 __attribute__((memory, numbanks(8))) buf[2][N]; + + // additional iterations to fill the buffers + for(unsigned step = 0; step < (N * DEPTH) + delay; step++){ + + unsigned where = (step & ((N * DEPTH) - 1)) * 8; + + float2x8 data; + if (step < (N * DEPTH)) { + data.i0 = src[where + 0]; + data.i1 = src[where + 1]; + data.i2 = src[where + 2]; + data.i3 = src[where + 3]; + data.i4 = src[where + 4]; + data.i5 = src[where + 5]; + data.i6 = src[where + 6]; + data.i7 = src[where + 7]; + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + unsigned row = step & (DEPTH - 1); + data = bitreverse_fetch(data, + is_bitrevA ? buf[0] : buf[1], + is_bitrevA ? buf[1] : buf[0], + row); + + if (step >= delay) { + write_channel_intel(chaninfft3da[0], data.i0); + write_channel_intel(chaninfft3da[1], data.i1); + write_channel_intel(chaninfft3da[2], data.i2); + write_channel_intel(chaninfft3da[3], data.i3); + write_channel_intel(chaninfft3da[4], data.i4); + write_channel_intel(chaninfft3da[5], data.i5); + write_channel_intel(chaninfft3da[6], data.i6); + write_channel_intel(chaninfft3da[7], data.i7); + } + } +} + +/* This single work-item task wraps the FFT engine + * 'inverse' toggles between the direct and the inverse transform + */ +kernel void fft3da(int inverse) { + + /* The FFT engine requires a sliding window for data reordering; data stored + * in this array is carried across loop iterations and shifted by 1 element + * every iteration; all loop dependencies derived from the uses of this + * array are simple transfers between adjacent array elements + */ + + float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; + + #pragma loop_coalesce + for(unsigned j = 0; j < N; j++){ + for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { + float2x8 data; + + if (i < N * (N / POINTS)) { + data.i0 = read_channel_intel(chaninfft3da[0]); + data.i1 = read_channel_intel(chaninfft3da[1]); + data.i2 = read_channel_intel(chaninfft3da[2]); + data.i3 = read_channel_intel(chaninfft3da[3]); + data.i4 = read_channel_intel(chaninfft3da[4]); + data.i5 = read_channel_intel(chaninfft3da[5]); + data.i6 = read_channel_intel(chaninfft3da[6]); + data.i7 = read_channel_intel(chaninfft3da[7]); + } + else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + + data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); + + // Write result to channels + if (i >= N / POINTS - 1) { + write_channel_intel(chaninTranspose[0], data.i0); + write_channel_intel(chaninTranspose[1], data.i1); + write_channel_intel(chaninTranspose[2], data.i2); + write_channel_intel(chaninTranspose[3], data.i3); + write_channel_intel(chaninTranspose[4], data.i4); + write_channel_intel(chaninTranspose[5], data.i5); + write_channel_intel(chaninTranspose[6], data.i6); + write_channel_intel(chaninTranspose[7], data.i7); + } + } + } +} + +__attribute__((max_global_work_dim(0))) +kernel void transpose() { + const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 + bool is_bufA = false, is_bitrevA = false; + + float2 buf[2][DEPTH][POINTS]; + //float2 bitrev_in[2][N], bitrev_out[2][N]; + //float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; + float2 bitrev_in[2][N]; + float2 __attribute__((memory, numbanks(8))) bitrev_out[2][N]; + + int initial_delay = DELAY + DELAY; // for each of the bitrev buffer + + // additional iterations to fill the buffers + for(int step = -initial_delay; step < ((N * DEPTH) + DEPTH); step++){ + + float2x8 data, data_out; + if (step < ((N * DEPTH) - initial_delay)) { + data.i0 = read_channel_intel(chaninTranspose[0]); + data.i1 = read_channel_intel(chaninTranspose[1]); + data.i2 = read_channel_intel(chaninTranspose[2]); + data.i3 = read_channel_intel(chaninTranspose[3]); + data.i4 = read_channel_intel(chaninTranspose[4]); + data.i5 = read_channel_intel(chaninTranspose[5]); + data.i6 = read_channel_intel(chaninTranspose[6]); + data.i7 = read_channel_intel(chaninTranspose[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + + // Swap buffers every N*N/8 iterations + // starting from the additional delay of N/8 iterations + is_bufA = (( (step + DELAY) & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; + + // Swap bitrev buffers every N/8 iterations + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + unsigned row = step & (DEPTH - 1); + data = bitreverse_in(data, + is_bitrevA ? bitrev_in[0] : bitrev_in[1], + is_bitrevA ? bitrev_in[1] : bitrev_in[0], + row); + + writeBuf(data, + is_bufA ? buf[0] : buf[1], + step, DELAY); + + data_out = readBuf( + is_bufA ? buf[1] : buf[0], + step); + + unsigned start_row = (step + DELAY) & (DEPTH -1); + data_out = bitreverse_out( + is_bitrevA ? bitrev_out[0] : bitrev_out[1], + is_bitrevA ? bitrev_out[1] : bitrev_out[0], + data_out, start_row); + + + if (step >= (DEPTH)) { + write_channel_intel(chaninfft3db[0], data_out.i0); + write_channel_intel(chaninfft3db[1], data_out.i1); + write_channel_intel(chaninfft3db[2], data_out.i2); + write_channel_intel(chaninfft3db[3], data_out.i3); + write_channel_intel(chaninfft3db[4], data_out.i4); + write_channel_intel(chaninfft3db[5], data_out.i5); + write_channel_intel(chaninfft3db[6], data_out.i6); + write_channel_intel(chaninfft3db[7], data_out.i7); + } + } +} + +kernel void fft3db(int inverse) { + + /* The FFT engine requires a sliding window for data reordering; data stored + * in this array is carried across loop iterations and shifted by 1 element + * every iteration; all loop dependencies derived from the uses of this + * array are simple transfers between adjacent array elements + */ + + float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; + + #pragma loop_coalesce + for(unsigned j = 0; j < N; j++){ + for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { + float2x8 data; + + if (i < N * (N / POINTS)) { + data.i0 = read_channel_intel(chaninfft3db[0]); + data.i1 = read_channel_intel(chaninfft3db[1]); + data.i2 = read_channel_intel(chaninfft3db[2]); + data.i3 = read_channel_intel(chaninfft3db[3]); + data.i4 = read_channel_intel(chaninfft3db[4]); + data.i5 = read_channel_intel(chaninfft3db[5]); + data.i6 = read_channel_intel(chaninfft3db[6]); + data.i7 = read_channel_intel(chaninfft3db[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + + data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); + + if (i >= N / POINTS - 1) { + write_channel_intel(chaninTranStore1[0], data.i0); + write_channel_intel(chaninTranStore1[1], data.i1); + write_channel_intel(chaninTranStore1[2], data.i2); + write_channel_intel(chaninTranStore1[3], data.i3); + write_channel_intel(chaninTranStore1[4], data.i4); + write_channel_intel(chaninTranStore1[5], data.i5); + write_channel_intel(chaninTranStore1[6], data.i6); + write_channel_intel(chaninTranStore1[7], data.i7); + } + } + } +} + +__attribute__((max_global_work_dim(0))) +kernel void transposeStore1(__global __attribute__((buffer_location(BUFFER_LOCATION))) volatile float2 * restrict dest) { + + const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 + bool is_bufA = false, is_bitrevA = false; + + float2 buf[2][DEPTH][POINTS]; + //float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; + float2 bitrev_in[2][N]; + + int initial_delay = DELAY; // for each of the bitrev buffer + // additional iterations to fill the buffers + for(int step = -initial_delay; step < ((N * DEPTH) + DEPTH); step++){ + + float2x8 data, data_out; + if (step < ((N * DEPTH) - initial_delay)) { + data.i0 = read_channel_intel(chaninTranStore1[0]); + data.i1 = read_channel_intel(chaninTranStore1[1]); + data.i2 = read_channel_intel(chaninTranStore1[2]); + data.i3 = read_channel_intel(chaninTranStore1[3]); + data.i4 = read_channel_intel(chaninTranStore1[4]); + data.i5 = read_channel_intel(chaninTranStore1[5]); + data.i6 = read_channel_intel(chaninTranStore1[6]); + data.i7 = read_channel_intel(chaninTranStore1[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + // Swap buffers every N*N/8 iterations + // starting from the additional delay of N/8 iterations + is_bufA = (( step & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; + + // Swap bitrev buffers every N/8 iterations + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + unsigned row = step & (DEPTH - 1); + data = bitreverse_in(data, + is_bitrevA ? bitrev_in[0] : bitrev_in[1], + is_bitrevA ? bitrev_in[1] : bitrev_in[0], + row); + + writeBuf(data, + is_bufA ? buf[0] : buf[1], + step, 0); + + data_out = readBuf_store( + is_bufA ? buf[1] : buf[0], + step); + + if (step >= (DEPTH)) { + unsigned index = (step - DEPTH) * 8; + + dest[index + 0] = data_out.i0; + dest[index + 1] = data_out.i1; + dest[index + 2] = data_out.i2; + dest[index + 3] = data_out.i3; + dest[index + 4] = data_out.i4; + dest[index + 5] = data_out.i5; + dest[index + 6] = data_out.i6; + dest[index + 7] = data_out.i7; + } + } +} +__attribute__((max_global_work_dim(0))) +kernel void fetchBitrev2(__global __attribute__((buffer_location(BUFFER_LOCATION))) volatile float2 * restrict src) { + unsigned delay = (1 << (LOGN - LOGPOINTS)); // N / 8 + + bool is_bufA = false, is_bitrevA = false; + float2 __attribute__((memory, numbanks(8))) bitrev_out[2][N]; + float2 buf[2][DEPTH][POINTS]; + + // additional iterations to fill the buffers + for(unsigned step = 0; step < (N * DEPTH) + DEPTH + delay; step++){ + // increment z by 1 every N/8 steps until (N*N/ 8) + unsigned start_index = step + delay; + unsigned zdim = (step >> (LOGN - LOGPOINTS)) & (N - 1); + + // increment y by 1 every N*N/8 points until N + unsigned ydim = (step >> (LOGN + LOGN - LOGPOINTS)) & (N - 1); + + // increment by 8 until N / 8 + unsigned xdim = (step * 8) & (N - 1); + + // increment by 1 every N*N*N / 8 steps + unsigned batch_index = (step >> (LOGN + LOGN + LOGN - LOGPOINTS)); + + unsigned index = (batch_index * N * N * N) + (zdim * N * N) + (ydim * N) + xdim; + + float2x8 data, data_out; + if (step < (N * DEPTH)) { + data.i0 = src[index + 0]; + data.i1 = src[index + 1]; + data.i2 = src[index + 2]; + data.i3 = src[index + 3]; + data.i4 = src[index + 4]; + data.i5 = src[index + 5]; + data.i6 = src[index + 6]; + data.i7 = src[index + 7]; + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + + is_bufA = (( step & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; + + // Swap bitrev buffers every N/8 iterations + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + writeBuf(data, + is_bufA ? buf[0] : buf[1], + step, 0); + + data_out = readBuf_fetch( + is_bufA ? buf[1] : buf[0], + step, 0); + + unsigned start_row = step & (DEPTH -1); + data_out = bitreverse_out( + is_bitrevA ? bitrev_out[0] : bitrev_out[1], + is_bitrevA ? bitrev_out[1] : bitrev_out[0], + data_out, start_row); + + if (step >= (DEPTH + delay)) { + + write_channel_intel(chaninfft3dc[0], data_out.i0); + write_channel_intel(chaninfft3dc[1], data_out.i1); + write_channel_intel(chaninfft3dc[2], data_out.i2); + write_channel_intel(chaninfft3dc[3], data_out.i3); + write_channel_intel(chaninfft3dc[4], data_out.i4); + write_channel_intel(chaninfft3dc[5], data_out.i5); + write_channel_intel(chaninfft3dc[6], data_out.i6); + write_channel_intel(chaninfft3dc[7], data_out.i7); + } + } +} + +/* + * Input and output data in bit-reversed format + */ +kernel void fft3dc(int inverse) { + + /* The FFT engine requires a sliding window for data reordering; data stored + * in this array is carried across loop iterations and shifted by 1 element + * every iteration; all loop dependencies derived from the uses of this + * array are simple transfers between adjacent array elements + */ + + float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; + + #pragma loop_coalesce + for(unsigned j = 0; j < N; j++){ + + for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { + float2x8 data; + + if (i < N * (N / POINTS)) { + data.i0 = read_channel_intel(chaninfft3dc[0]); + data.i1 = read_channel_intel(chaninfft3dc[1]); + data.i2 = read_channel_intel(chaninfft3dc[2]); + data.i3 = read_channel_intel(chaninfft3dc[3]); + data.i4 = read_channel_intel(chaninfft3dc[4]); + data.i5 = read_channel_intel(chaninfft3dc[5]); + data.i6 = read_channel_intel(chaninfft3dc[6]); + data.i7 = read_channel_intel(chaninfft3dc[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + + // Perform one FFT step + data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); + + // Write result to channels + if (i >= N / POINTS - 1) { + write_channel_intel(chaninTranStore2[0], data.i0); + write_channel_intel(chaninTranStore2[1], data.i1); + write_channel_intel(chaninTranStore2[2], data.i2); + write_channel_intel(chaninTranStore2[3], data.i3); + write_channel_intel(chaninTranStore2[4], data.i4); + write_channel_intel(chaninTranStore2[5], data.i5); + write_channel_intel(chaninTranStore2[6], data.i6); + write_channel_intel(chaninTranStore2[7], data.i7); + } + } + } +} + +__attribute__((max_global_work_dim(0))) +kernel void transposeStore2(__global __attribute__((buffer_location("host"))) float2 * restrict dest) { + + const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 + bool is_bufA = false, is_bitrevA = false; + + float2 buf[2][DEPTH][POINTS]; + float2 bitrev_in[2][N]; + //float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; + + int initial_delay = DELAY; // for each of the bitrev buffer + // additional iterations to fill the buffers + for(int step = -initial_delay; step < ((N * DEPTH) + DEPTH); step++){ + + float2x8 data, data_out; + if (step < ((N * DEPTH) - initial_delay)) { + data.i0 = read_channel_intel(chaninTranStore2[0]); + data.i1 = read_channel_intel(chaninTranStore2[1]); + data.i2 = read_channel_intel(chaninTranStore2[2]); + data.i3 = read_channel_intel(chaninTranStore2[3]); + data.i4 = read_channel_intel(chaninTranStore2[4]); + data.i5 = read_channel_intel(chaninTranStore2[5]); + data.i6 = read_channel_intel(chaninTranStore2[6]); + data.i7 = read_channel_intel(chaninTranStore2[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + // Swap buffers every N*N/8 iterations + // starting from the additional delay of N/8 iterations + is_bufA = (( step & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; + + // Swap bitrev buffers every N/8 iterations + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + unsigned row = step & (DEPTH - 1); + data = bitreverse_in(data, + is_bitrevA ? bitrev_in[0] : bitrev_in[1], + is_bitrevA ? bitrev_in[1] : bitrev_in[0], + row); + + writeBuf(data, + is_bufA ? buf[0] : buf[1], + step, 0); + + data_out = readBuf_store( + is_bufA ? buf[1] : buf[0], + step); + + if (step >= (DEPTH)) { + unsigned start_index = (step - DEPTH); + // increment z by 1 every N/8 steps until (N*N/ 8) + unsigned zdim = (start_index >> (LOGN - LOGPOINTS)) & (N - 1); + + // increment y by 1 every N*N/8 points until N + unsigned ydim = (start_index >> (LOGN + LOGN - LOGPOINTS)) & (N - 1); + + // incremenet by 8 until N / 8 + unsigned xdim = (start_index * 8) & ( N - 1); + //unsigned index = (step - DEPTH) * 8; + + // increment by N*N*N + unsigned cube = LOGN + LOGN + LOGN - LOGPOINTS; + + // increment by 1 every N*N*N / 8 steps + unsigned batch_index = (start_index >> cube); + //unsigned batch_index = 0; + + unsigned index = (batch_index * N * N * N) + (zdim * N * N) + (ydim * N) + xdim; + + dest[index + 0] = data_out.i0; + dest[index + 1] = data_out.i1; + dest[index + 2] = data_out.i2; + dest[index + 3] = data_out.i3; + dest[index + 4] = data_out.i4; + dest[index + 5] = data_out.i5; + dest[index + 6] = data_out.i6; + dest[index + 7] = data_out.i7; + } + } +} \ No newline at end of file diff --git a/tests/test_fft1d_fpga.cpp b/tests/test_fft1d_fpga.cpp index 0d991ae..f5beed1 100644 --- a/tests/test_fft1d_fpga.cpp +++ b/tests/test_fft1d_fpga.cpp @@ -1,13 +1,15 @@ // Author: Arjun Ramaswami #include "gtest/gtest.h" // finds this because gtest is linked +#include +#include +#include extern "C" { #include "CL/opencl.h" #include "fftfpga/fftfpga.h" #include "helper.h" - #include - #include + #include "verify_fftw.h" #ifdef USE_FFTW #include @@ -25,15 +27,15 @@ TEST(fft1dFPGATest, InputValidity){ fpga_t fft_time = {0.0, 0.0, 0.0, 0}; // null inp ptr input - fft_time = fftfpgaf_c2c_1d(64, NULL, test, 0, 1); + fft_time = fftfpgaf_c2c_1d(N, NULL, test, false, 1); EXPECT_EQ(fft_time.valid, 0); // null out ptr input - fft_time = fftfpgaf_c2c_1d(64, test, NULL, 0, 1); + fft_time = fftfpgaf_c2c_1d(N, test, NULL, false, 1); EXPECT_EQ(fft_time.valid, 0); // if N not a power of 2 - fft_time = fftfpgaf_c2c_1d(63, test, test, 0, 1); + fft_time = fftfpgaf_c2c_1d(N-1, test, test, false, 1); EXPECT_EQ(fft_time.valid, 0); free(test); @@ -57,57 +59,46 @@ TEST(fft1dFPGATest, CorrectnessSp){ fpga_t fft_time = fftfpgaf_c2c_1d(64, inp, out, 0, 1); - fftwf_complex* fftw_inp = (fftwf_complex*)fftwf_alloc_complex(sz); - fftwf_complex* fftw_out = (fftwf_complex*)fftwf_alloc_complex(sz); - fftwf_plan plan = fftwf_plan_dft_1d( N, &fftw_inp[0], &fftw_out[0], FFTW_FORWARD, FFTW_ESTIMATE); - - float2 *temp = (float2 *)fftfpgaf_complex_malloc(sz); - - for (int i = 0; i < N; i++){ - temp[i] = out[i]; - } - for (int i = 0; i < N; i++) { - int fwd = i; - int bit_rev = 0; - for (int j = 0; j < logN; j++) { - bit_rev <<= 1; - bit_rev |= fwd & 1; - fwd >>= 1; - } - out[i] = temp[bit_rev]; - } - - for(int i = 0; i < N; i++){ - fftw_inp[i][0] = inp[i].x; - fftw_inp[i][1] = inp[i].y; - } - - fftwf_execute(plan); - - double mag_sum = 0, noise_sum = 0; - - for (int i = 0; i < N; i++) { - double magnitude = fftw_out[i][0] * fftw_out[i][0] + \ - fftw_out[i][1] * fftw_out[i][1]; - double noise = (fftw_out[i][0] - out[i].x) \ - * (fftw_out[i][0] - out[i].x) + - (fftw_out[i][1] - out[i].y) * (fftw_out[i][1] - out[i].y); - - mag_sum += magnitude; - noise_sum += noise; - } - double db = 10 * log(mag_sum / noise_sum) / log(10.0); - ASSERT_GT(db, 120); - EXPECT_GT(fft_time.exec_t, 0.0); - EXPECT_EQ(fft_time.valid, 1); - - fftwf_free(fftw_inp); - fftwf_free(fftw_out); - fftwf_destroy_plan(plan); - free(temp); - fpga_final(); + bool result = verify_fftwf(out, inp, N, 1, false, 1); + EXPECT_TRUE(result); free(inp); free(out); + + fpga_final(); #endif +} + +/** + * \brief fftfpgaf_c2c_1d() + */ +TEST(fft1dFPGATest, InputValiditySVM){ + const int N = (1 << 6); + + size_t sz = sizeof(float2) * N; + float2 *test = (float2*)malloc(sz); + fpga_t fft_time = {0.0, 0.0, 0.0, 0}; + + // svm not enabled + fft_time = fftfpgaf_c2c_1d_svm(N, test, test, false, 1); + EXPECT_EQ(fft_time.valid, 0); + + int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft1d/ff1d.aocx", true); + ASSERT_EQ(isInit, 0); + + // null inp ptr input + fft_time = fftfpgaf_c2c_1d_svm(N, NULL, test, false, 1); + EXPECT_EQ(fft_time.valid, 0); + + // null out ptr input + fft_time = fftfpgaf_c2c_1d_svm(N, test, NULL, false, 1); + EXPECT_EQ(fft_time.valid, 0); + + // if N not a power of 2 + fft_time = fftfpgaf_c2c_1d_svm(N-1, test, test, false, 1); + EXPECT_EQ(fft_time.valid, 0); + + free(test); + + fpga_final(); } \ No newline at end of file From dbf7c709c0cc371b528dbf493b029245dbc14f8e Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Tue, 12 Jan 2021 10:18:08 +0100 Subject: [PATCH 37/76] Reordered host kernel calls, removed channel depth --- api/src/fft3d.c | 337 ++++++++++++++++++----------------- kernels/CMakeLists.txt | 9 +- kernels/fft3d/CMakeLists.txt | 3 +- kernels/fft3d/fft3d_bram.cl | 12 +- kernels/fft3d/fft3d_ddr.cl | 12 +- 5 files changed, 195 insertions(+), 178 deletions(-) diff --git a/api/src/fft3d.c b/api/src/fft3d.c index 772fa53..b9124e8 100644 --- a/api/src/fft3d.c +++ b/api/src/fft3d.c @@ -121,26 +121,26 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, const float2 *inp, float2 *out, bool inv, boo cl_event startExec_event, endExec_event; fft_time.exec_t = getTimeinMilliSec(); - status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, &startExec_event); - checkError(status, "Failed to launch fetch kernel"); + status = clEnqueueTask(queue7, store_kernel, 0, NULL, &endExec_event); + checkError(status, "Failed to launch store transpose kernel"); - status = clEnqueueTask(queue2, fft3da_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); + status = clEnqueueTask(queue6, fft3dc_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch third fft kernel"); - status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); + status = clEnqueueTask(queue5, transpose3d_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second transpose kernel"); status = clEnqueueTask(queue4, fft3db_kernel, 0, NULL, NULL); checkError(status, "Failed to launch second fft kernel"); - status = clEnqueueTask(queue5, transpose3d_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second transpose kernel"); + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); - status = clEnqueueTask(queue6, fft3dc_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch third fft kernel"); + status = clEnqueueTask(queue2, fft3da_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); - status = clEnqueueTask(queue7, store_kernel, 0, NULL, &endExec_event); - checkError(status, "Failed to launch store transpose kernel"); + status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, &startExec_event); + checkError(status, "Failed to launch fetch kernel"); // Wait for all command queues to complete pending events status = clFinish(queue1); @@ -211,7 +211,7 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, const float2 *inp, float2 *out, bool inv, boo } /** - * \brief compute an out-of-place single precision complex 3D FFT using the DDR for 3D Transpose where the data access between the host and the FPGA is using Shared Virtual Memory (SVM) + * \brief compute an out-of-place single precision complex 3D-FFT using the DDR of the FPGA for 3D Transpose * \param N : integer pointer addressing the size of FFT3d * \param inp : float2 pointer to input data of size [N * N * N] * \param out : float2 pointer to output data of size [N * N * N] @@ -219,16 +219,20 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, const float2 *inp, float2 *out, bool inv, boo * \param interleaving : 1 if using burst interleaved global memory buffers * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, bool interleaving) { +fpga_t fftfpgaf_c2c_3d_ddr(int N, const float2 *inp, float2 *out, bool inv) { fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; cl_int status = 0; int num_pts = N * N * N; // if N is not a power of 2 - if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || !(svm_enabled)){ + if(inp == NULL || out == NULL || ( (N & (N-1)) !=0)){ return fft_time; } +#ifdef VERBOSE + printf("Launching%s 3d FFT transform in DDR \n", inv ? " inverse":""); +#endif + // Can't pass bool to device, so convert it to int int inverse_int = (int)inv; @@ -255,55 +259,36 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, queue_setup(); // Device memory buffers - cl_mem d_inOutData; - if(!interleaving){ - d_inOutData = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); - } - else{ - d_inOutData = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float2) * num_pts, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); - } + cl_mem d_inData, d_transpose, d_outData; + d_inData = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate input device buffer\n"); - // allocate SVM buffers - float2 *h_inData, *h_outData; - h_inData = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); - h_outData = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); + d_transpose = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); - double svmBufferCopyIn_timer = getTimeinMilliSec(); - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData, sizeof(float2) * num_pts, 0, NULL, NULL); - checkError(status, "Failed to map input data"); + d_outData = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); - // copy data into h_inData - size_t num_bytes = num_pts * sizeof(float2); - memcpy(h_inData, inp, num_bytes); - /* - for(size_t i = 0; i < num_pts; i++){ - h_inData[i].x = inp[i].x; - h_inData[i].y = inp[i].y; - } - */ + // Copy data from host to device + cl_event writeBuf_event; + fft_time.pcie_write_t = getTimeinMilliSec(); - status = clEnqueueSVMUnmap(queue1, (void *)h_inData, 0, NULL, NULL); - checkError(status, "Failed to unmap input data"); + status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * num_pts, inp, 0, NULL, &writeBuf_event); - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); - checkError(status, "Failed to map input data"); + status = clFinish(queue1); + checkError(status, "failed to finish"); - // copy data into h_inData - for(size_t i = 0; i < num_pts; i++){ - h_outData[i].x = 0.0; - h_outData[i].y = 0.0; - } + fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; + checkError(status, "Failed to copy data to device"); - status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); - checkError(status, "Failed to unmap input data"); + cl_ulong writeBuf_start = 0.0, writeBuf_end = 0.0; - svmBufferCopyIn_timer = getTimeinMilliSec() - svmBufferCopyIn_timer; - printf("\nSVM Buffer Copy In Time: %lfms\n", svmBufferCopyIn_timer); + clGetEventProfilingInfo(writeBuf_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &writeBuf_start, NULL); + clGetEventProfilingInfo(writeBuf_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &writeBuf_end, NULL); - // write to fetch kernel using SVM based PCIe - status = clSetKernelArgSVMPointer(fetch1_kernel, 0, (void *)h_inData); + fft_time.hw_pcie_write_t = (cl_double)(writeBuf_end - writeBuf_start) * (cl_double)(1e-06); + + status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData); checkError(status, "Failed to set fetch1 kernel arg"); status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); @@ -311,47 +296,50 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, status=clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); checkError(status, "Failed to set fftb kernel arg"); - // kernel stores to DDR memory - status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void*)&d_inOutData); + status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_transpose); checkError(status, "Failed to set store1 kernel arg"); - // kernel fetches from DDR memory - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void*)&d_inOutData); + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_transpose); checkError(status, "Failed to set fetch2 kernel arg"); status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); checkError(status, "Failed to set fftc kernel arg"); - - // kernel stores using SVM based PCIe to host - status = clSetKernelArgSVMPointer(store2_kernel, 0, (void*)h_outData); + status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData); checkError(status, "Failed to set store2 kernel arg"); + // Kernel Execution cl_event startExec_event, endExec_event; fft_time.exec_t = getTimeinMilliSec(); - status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, &startExec_event); - checkError(status, "Failed to launch fetch kernel"); - - status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue7, store2_kernel, 0, NULL, &endExec_event); checkError(status, "Failed to launch transpose kernel"); - status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second fft kernel"); + status = clEnqueueTask(queue6, fftc_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); checkError(status, "Failed to launch second transpose kernel"); + // enqueue fetch to same queue as the store kernel due to data dependency + // therefore, not swapped status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fetch kernel"); - status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); + status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second fft kernel"); - status = clEnqueueTask(queue3, store2_kernel, 0, NULL, &endExec_event); + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); checkError(status, "Failed to launch transpose kernel"); + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, &startExec_event); + checkError(status, "Failed to launch fetch kernel"); + + status = clFinish(queue7); + checkError(status, "failed to finish"); + status = clFinish(queue6); + checkError(status, "failed to finish"); status = clFinish(queue5); checkError(status, "failed to finish"); status = clFinish(queue4); @@ -364,42 +352,39 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, checkError(status, "failed to finish"); fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; - + cl_ulong kernel_start = 0, kernel_end = 0; clGetEventProfilingInfo(startExec_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); clGetEventProfilingInfo(endExec_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end, NULL); - fft_time.hw_exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); - - double svmBufferCopyout_timer = getTimeinMilliSec(); - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, - (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); - checkError(status, "Failed to map out data"); + fft_time.hw_exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); - memcpy(out, h_outData, num_bytes); - /* - for(size_t i = 0; i < num_pts; i++){ - out[i].x = h_outData[i].x; - out[i].y = h_outData[i].y; - } - */ + // Copy results from device to host + cl_event readBuf_event; + fft_time.pcie_read_t = getTimeinMilliSec(); + status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * num_pts, out, 0, NULL, &readBuf_event); + + status = clFinish(queue1); + checkError(status, "failed to finish reading DDR using PCIe"); - status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); - checkError(status, "Failed to unmap out data"); + fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; + checkError(status, "Failed to copy data from device"); - svmBufferCopyout_timer = getTimeinMilliSec() - svmBufferCopyout_timer; - printf("SVM Buffer Copy Out Time: %lfms\n\n", svmBufferCopyout_timer); + cl_ulong readBuf_start = 0, readBuf_end = 0; + clGetEventProfilingInfo(readBuf_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &readBuf_start, NULL); + clGetEventProfilingInfo(readBuf_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &readBuf_end, NULL); - if (h_inData) - clSVMFree(context, h_inData); - if (h_outData) - clSVMFree(context, h_outData); + fft_time.hw_pcie_read_t = (cl_double)(readBuf_end - readBuf_start) * (cl_double)(1e-06); queue_cleanup(); - if (d_inOutData) - clReleaseMemObject(d_inOutData); + if (d_inData) + clReleaseMemObject(d_inData); + if (d_outData) + clReleaseMemObject(d_outData); + if (d_transpose) + clReleaseMemObject(d_transpose); if(fetch1_kernel) clReleaseKernel(fetch1_kernel); @@ -426,7 +411,7 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, } /** - * \brief compute an out-of-place single precision complex 3D-FFT using the DDR of the FPGA for 3D Transpose + * \brief compute an out-of-place single precision complex 3D FFT using the DDR for 3D Transpose where the data access between the host and the FPGA is using Shared Virtual Memory (SVM) * \param N : integer pointer addressing the size of FFT3d * \param inp : float2 pointer to input data of size [N * N * N] * \param out : float2 pointer to output data of size [N * N * N] @@ -434,20 +419,16 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, * \param interleaving : 1 if using burst interleaved global memory buffers * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_3d_ddr(int N, const float2 *inp, float2 *out, bool inv) { +fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, bool interleaving) { fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; cl_int status = 0; int num_pts = N * N * N; // if N is not a power of 2 - if(inp == NULL || out == NULL || ( (N & (N-1)) !=0)){ + if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || !(svm_enabled)){ return fft_time; } -#ifdef VERBOSE - printf("Launching%s 3d FFT transform in DDR \n", inv ? " inverse":""); -#endif - // Can't pass bool to device, so convert it to int int inverse_int = (int)inv; @@ -474,36 +455,55 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, const float2 *inp, float2 *out, bool inv) { queue_setup(); // Device memory buffers - cl_mem d_inData, d_transpose, d_outData; - d_inData = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); - checkError(status, "Failed to allocate input device buffer\n"); - - d_transpose = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); + cl_mem d_inOutData; + if(!interleaving){ + d_inOutData = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + } + else{ + d_inOutData = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + } - d_outData = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); + // allocate SVM buffers + float2 *h_inData, *h_outData; + h_inData = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); + h_outData = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); - // Copy data from host to device - cl_event writeBuf_event; - fft_time.pcie_write_t = getTimeinMilliSec(); + double svmBufferCopyIn_timer = getTimeinMilliSec(); + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map input data"); - status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * num_pts, inp, 0, NULL, &writeBuf_event); + // copy data into h_inData + size_t num_bytes = num_pts * sizeof(float2); + memcpy(h_inData, inp, num_bytes); + /* + for(size_t i = 0; i < num_pts; i++){ + h_inData[i].x = inp[i].x; + h_inData[i].y = inp[i].y; + } + */ - status = clFinish(queue1); - checkError(status, "failed to finish"); + status = clEnqueueSVMUnmap(queue1, (void *)h_inData, 0, NULL, NULL); + checkError(status, "Failed to unmap input data"); - fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; - checkError(status, "Failed to copy data to device"); + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map input data"); - cl_ulong writeBuf_start = 0.0, writeBuf_end = 0.0; + // copy data into h_inData + for(size_t i = 0; i < num_pts; i++){ + h_outData[i].x = 0.0; + h_outData[i].y = 0.0; + } - clGetEventProfilingInfo(writeBuf_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &writeBuf_start, NULL); - clGetEventProfilingInfo(writeBuf_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &writeBuf_end, NULL); + status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); + checkError(status, "Failed to unmap input data"); - fft_time.hw_pcie_write_t = (cl_double)(writeBuf_end - writeBuf_start) * (cl_double)(1e-06); + svmBufferCopyIn_timer = getTimeinMilliSec() - svmBufferCopyIn_timer; + printf("\nSVM Buffer Copy In Time: %lfms\n", svmBufferCopyIn_timer); - status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData); + // write to fetch kernel using SVM based PCIe + status = clSetKernelArgSVMPointer(fetch1_kernel, 0, (void *)h_inData); checkError(status, "Failed to set fetch1 kernel arg"); status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); @@ -511,45 +511,51 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, const float2 *inp, float2 *out, bool inv) { status=clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); checkError(status, "Failed to set fftb kernel arg"); - status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_transpose); + // kernel stores to DDR memory + status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void*)&d_inOutData); checkError(status, "Failed to set store1 kernel arg"); - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_transpose); + // kernel fetches from DDR memory + status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void*)&d_inOutData); checkError(status, "Failed to set fetch2 kernel arg"); status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); checkError(status, "Failed to set fftc kernel arg"); - status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData); + + // kernel stores using SVM based PCIe to host + status = clSetKernelArgSVMPointer(store2_kernel, 0, (void*)h_outData); checkError(status, "Failed to set store2 kernel arg"); - // Kernel Execution cl_event startExec_event, endExec_event; fft_time.exec_t = getTimeinMilliSec(); - status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, &startExec_event); - checkError(status, "Failed to launch fetch kernel"); - - status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue7, store2_kernel, 0, NULL, &endExec_event); checkError(status, "Failed to launch transpose kernel"); - status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second fft kernel"); + status = clEnqueueTask(queue6, fftc_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); checkError(status, "Failed to launch second transpose kernel"); - // enqueue fetch to same queue as the store kernel due to data dependency status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fetch kernel"); - status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); + status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second fft kernel"); - status = clEnqueueTask(queue3, store2_kernel, 0, NULL, &endExec_event); + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); checkError(status, "Failed to launch transpose kernel"); + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, &startExec_event); + checkError(status, "Failed to launch fetch kernel"); + + status = clFinish(queue7); + checkError(status, "failed to finish"); + status = clFinish(queue6); + checkError(status, "failed to finish"); status = clFinish(queue5); checkError(status, "failed to finish"); status = clFinish(queue4); @@ -562,39 +568,42 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, const float2 *inp, float2 *out, bool inv) { checkError(status, "failed to finish"); fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; - + cl_ulong kernel_start = 0, kernel_end = 0; clGetEventProfilingInfo(startExec_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); clGetEventProfilingInfo(endExec_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end, NULL); - fft_time.hw_exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); + fft_time.hw_exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); - // Copy results from device to host - cl_event readBuf_event; - fft_time.pcie_read_t = getTimeinMilliSec(); - status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * num_pts, out, 0, NULL, &readBuf_event); - - status = clFinish(queue1); - checkError(status, "failed to finish reading DDR using PCIe"); + double svmBufferCopyout_timer = getTimeinMilliSec(); + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, + (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map out data"); - fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; - checkError(status, "Failed to copy data from device"); + memcpy(out, h_outData, num_bytes); + /* + for(size_t i = 0; i < num_pts; i++){ + out[i].x = h_outData[i].x; + out[i].y = h_outData[i].y; + } + */ - cl_ulong readBuf_start = 0, readBuf_end = 0; - clGetEventProfilingInfo(readBuf_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &readBuf_start, NULL); - clGetEventProfilingInfo(readBuf_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &readBuf_end, NULL); + status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); + checkError(status, "Failed to unmap out data"); - fft_time.hw_pcie_read_t = (cl_double)(readBuf_end - readBuf_start) * (cl_double)(1e-06); + svmBufferCopyout_timer = getTimeinMilliSec() - svmBufferCopyout_timer; + printf("SVM Buffer Copy Out Time: %lfms\n\n", svmBufferCopyout_timer); + + if (h_inData) + clSVMFree(context, h_inData); + if (h_outData) + clSVMFree(context, h_outData); queue_cleanup(); - if (d_inData) - clReleaseMemObject(d_inData); - if (d_outData) - clReleaseMemObject(d_outData); - if (d_transpose) - clReleaseMemObject(d_transpose); + if (d_inOutData) + clReleaseMemObject(d_inOutData); if(fetch1_kernel) clReleaseKernel(fetch1_kernel); diff --git a/kernels/CMakeLists.txt b/kernels/CMakeLists.txt index 974ac98..0636a95 100644 --- a/kernels/CMakeLists.txt +++ b/kernels/CMakeLists.txt @@ -44,8 +44,15 @@ if(NOT DEFINED FPGA_BOARD_NAME) endif() endif() +if(${FPGA_BOARD_NAME} STREQUAL "pac_s10_usm") + set(BOARD_PACKAGE "-board-package=/cm/shared/opt/intel_oneapi/beta-10/intelfpgadpcpp/2021.1-beta10/board/intel_s10sx_pac_usm/") + message("-- Board Package: ${BOARD_PACKAGE}") +else() + set(BOARD_PACKAGE "") +endif() + ## Flags for different target options -set(AOC_FLAGS "-g -v -fp-relaxed -cl-single-precision-constant -board-package=/cm/shared/opt/intel_oneapi/beta-10/intelfpgadpcpp/2021.1-beta10/board/intel_s10sx_pac_usm/ -no-interleaving=default" CACHE STRING "AOC compiler flags") +set(AOC_FLAGS "-g -v -no-interleaving=default ${BOARD_PACKAGE}" CACHE STRING "AOC compiler flags") separate_arguments(AOC_FLAGS) set(EMU_FLAGS "-legacy-emulator -march=emulator" CACHE STRING "AOC emulation flags") separate_arguments(EMU_FLAGS) diff --git a/kernels/fft3d/CMakeLists.txt b/kernels/fft3d/CMakeLists.txt index fc28162..d95e4b1 100644 --- a/kernels/fft3d/CMakeLists.txt +++ b/kernels/fft3d/CMakeLists.txt @@ -9,7 +9,8 @@ cmake_minimum_required(VERSION 3.10) # - ${kernel_name}_syn: to generate synthesis binary ## set(CL_PATH "${fftkernelsfpga_SOURCE_DIR}/fft3d") -set(kernels fft3d_bram fft3d_bram_triv fft3d_ddr fft3d_ddr_triv) +set(kernels fft3d_bram fft3d_bram_triv fft3d_ddr fft3d_ddr_triv + fft3d_ddr_batch) include(${fftkernelsfpga_SOURCE_DIR}/cmake/genKernelTargets.cmake) diff --git a/kernels/fft3d/fft3d_bram.cl b/kernels/fft3d/fft3d_bram.cl index 3decfa0..59803c8 100755 --- a/kernels/fft3d/fft3d_bram.cl +++ b/kernels/fft3d/fft3d_bram.cl @@ -6,14 +6,14 @@ #pragma OPENCL EXTENSION cl_intel_channels : enable -channel float2 chaninfft3da[POINTS] __attribute__((depth(POINTS))); -channel float2 chaninfft3db[POINTS] __attribute__((depth(POINTS))); -channel float2 chaninfft3dc[POINTS] __attribute__((depth(POINTS))); +channel float2 chaninfft3da[POINTS]; +channel float2 chaninfft3db[POINTS]; +channel float2 chaninfft3dc[POINTS]; -channel float2 chaninTranspose[POINTS] __attribute__((depth(POINTS))); -channel float2 chaninTranspose3D[POINTS] __attribute__((depth(POINTS))); +channel float2 chaninTranspose[POINTS]; +channel float2 chaninTranspose3D[POINTS]; -channel float2 chaninTranStore[POINTS] __attribute__((depth(POINTS))); +channel float2 chaninTranStore[POINTS]; // Kernel that fetches data from global memory kernel void fetch(global volatile float2 * restrict src) { diff --git a/kernels/fft3d/fft3d_ddr.cl b/kernels/fft3d/fft3d_ddr.cl index 9a35570..8bcede3 100755 --- a/kernels/fft3d/fft3d_ddr.cl +++ b/kernels/fft3d/fft3d_ddr.cl @@ -6,13 +6,13 @@ #pragma OPENCL EXTENSION cl_intel_channels : enable -channel float2 chaninfft3da[POINTS] __attribute__((depth(POINTS))); -channel float2 chaninfft3db[POINTS] __attribute__((depth(POINTS))); -channel float2 chaninfft3dc[POINTS] __attribute__((depth(POINTS))); +channel float2 chaninfft3da[POINTS]; +channel float2 chaninfft3db[POINTS]; +channel float2 chaninfft3dc[POINTS]; -channel float2 chaninTranspose[POINTS] __attribute__((depth(POINTS))); -channel float2 chaninTranStore1[POINTS] __attribute__((depth(POINTS))); -channel float2 chaninTranStore2[POINTS] __attribute__((depth(POINTS))); +channel float2 chaninTranspose[POINTS]; +channel float2 chaninTranStore1[POINTS]; +channel float2 chaninTranStore2[POINTS]; // Kernel that fetches data from global memory kernel void fetchBitrev1(__global __attribute__((buffer_location(SVM_HOST_BUFFER_LOCATION))) volatile float2 * restrict src) { From 179d940148084378945b4de17007bddb2cecf0fc Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Mon, 18 Jan 2021 18:00:11 +0100 Subject: [PATCH 38/76] batched fft3d with safelen --- kernels/fft3d/fft3d_ddr_batch.cl | 628 +++++++++++++++++++++++++++++++ 1 file changed, 628 insertions(+) create mode 100755 kernels/fft3d/fft3d_ddr_batch.cl diff --git a/kernels/fft3d/fft3d_ddr_batch.cl b/kernels/fft3d/fft3d_ddr_batch.cl new file mode 100755 index 0000000..a602f77 --- /dev/null +++ b/kernels/fft3d/fft3d_ddr_batch.cl @@ -0,0 +1,628 @@ +// Author: Arjun Ramaswami + +#include "fft_config.h" +#include "fft_8.cl" +#include "../matrixTranspose/diagonal_bitrev.cl" + +#pragma OPENCL EXTENSION cl_intel_channels : enable + +channel float2 chaninfft3da[POINTS]; +channel float2 chaninfft3db[POINTS]; +channel float2 chaninfft3dc[POINTS]; + +channel float2 chaninTranspose[POINTS]; +channel float2 chaninTranspose3D[POINTS]; +channel float2 chaninStore[POINTS]; + +#define WR_GLOBALMEM 0 +#define RD_GLOBALMEM 1 +#define BATCH 2 + +// Kernel that fetches data from global memory +kernel void fetch(__global __attribute__((buffer_location(SVM_HOST_BUFFER_LOCATION))) volatile float2 * restrict src) { + unsigned delay = (1 << (LOGN - LOGPOINTS)); // N / 8 + bool is_bitrevA = false; + + float2 __attribute__((memory, numbanks(8))) buf[2][N]; + + // additional iterations to fill the buffers + for(unsigned step = 0; step < (N * DEPTH) + delay; step++){ + + unsigned where = (step & ((N * DEPTH) - 1)) * 8; + + float2x8 data; + if (step < (N * DEPTH)) { + data.i0 = src[where + 0]; + data.i1 = src[where + 1]; + data.i2 = src[where + 2]; + data.i3 = src[where + 3]; + data.i4 = src[where + 4]; + data.i5 = src[where + 5]; + data.i6 = src[where + 6]; + data.i7 = src[where + 7]; + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + unsigned row = step & (DEPTH - 1); + data = bitreverse_fetch(data, + is_bitrevA ? buf[0] : buf[1], + is_bitrevA ? buf[1] : buf[0], + row); + + if (step >= delay) { + write_channel_intel(chaninfft3da[0], data.i0); + write_channel_intel(chaninfft3da[1], data.i1); + write_channel_intel(chaninfft3da[2], data.i2); + write_channel_intel(chaninfft3da[3], data.i3); + write_channel_intel(chaninfft3da[4], data.i4); + write_channel_intel(chaninfft3da[5], data.i5); + write_channel_intel(chaninfft3da[6], data.i6); + write_channel_intel(chaninfft3da[7], data.i7); + } + } +} + +kernel void fft3da(int inverse) { + + /* The FFT engine requires a sliding window for data reordering; data stored + * in this array is carried across loop iterations and shifted by 1 element + * every iteration; all loop dependencies derived from the uses of this + * array are simple transfers between adjacent array elements + */ + + float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; + + #pragma loop_coalesce + for(unsigned j = 0; j < N; j++){ + for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { + float2x8 data; + + if (i < N * (N / POINTS)) { + data.i0 = read_channel_intel(chaninfft3da[0]); + data.i1 = read_channel_intel(chaninfft3da[1]); + data.i2 = read_channel_intel(chaninfft3da[2]); + data.i3 = read_channel_intel(chaninfft3da[3]); + data.i4 = read_channel_intel(chaninfft3da[4]); + data.i5 = read_channel_intel(chaninfft3da[5]); + data.i6 = read_channel_intel(chaninfft3da[6]); + data.i7 = read_channel_intel(chaninfft3da[7]); + } + else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + + data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); + + // Write result to channels + if (i >= N / POINTS - 1) { + write_channel_intel(chaninTranspose[0], data.i0); + write_channel_intel(chaninTranspose[1], data.i1); + write_channel_intel(chaninTranspose[2], data.i2); + write_channel_intel(chaninTranspose[3], data.i3); + write_channel_intel(chaninTranspose[4], data.i4); + write_channel_intel(chaninTranspose[5], data.i5); + write_channel_intel(chaninTranspose[6], data.i6); + write_channel_intel(chaninTranspose[7], data.i7); + } + } + } +} + +kernel void transpose() { + const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 + bool is_bufA = false, is_bitrevA = false; + + float2 buf[2][DEPTH][POINTS]; + //float2 bitrev_in[2][N], bitrev_out[2][N]; + //float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; + float2 bitrev_in[2][N]; + float2 __attribute__((memory, numbanks(8))) bitrev_out[2][N]; + + int initial_delay = DELAY + DELAY; // for each of the bitrev buffer + + // additional iterations to fill the buffers + for(int step = -initial_delay; step < ((N * DEPTH) + DEPTH); step++){ + + float2x8 data, data_out; + if (step < ((N * DEPTH) - initial_delay)) { + data.i0 = read_channel_intel(chaninTranspose[0]); + data.i1 = read_channel_intel(chaninTranspose[1]); + data.i2 = read_channel_intel(chaninTranspose[2]); + data.i3 = read_channel_intel(chaninTranspose[3]); + data.i4 = read_channel_intel(chaninTranspose[4]); + data.i5 = read_channel_intel(chaninTranspose[5]); + data.i6 = read_channel_intel(chaninTranspose[6]); + data.i7 = read_channel_intel(chaninTranspose[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + + // Swap buffers every N*N/8 iterations + // starting from the additional delay of N/8 iterations + is_bufA = (( (step + DELAY) & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; + + // Swap bitrev buffers every N/8 iterations + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + unsigned row = step & (DEPTH - 1); + data = bitreverse_in(data, + is_bitrevA ? bitrev_in[0] : bitrev_in[1], + is_bitrevA ? bitrev_in[1] : bitrev_in[0], + row); + + writeBuf(data, + is_bufA ? buf[0] : buf[1], + step, DELAY); + + data_out = readBuf( + is_bufA ? buf[1] : buf[0], + step); + + unsigned start_row = (step + DELAY) & (DEPTH -1); + data_out = bitreverse_out( + is_bitrevA ? bitrev_out[0] : bitrev_out[1], + is_bitrevA ? bitrev_out[1] : bitrev_out[0], + data_out, start_row); + + + if (step >= (DEPTH)) { + write_channel_intel(chaninfft3db[0], data_out.i0); + write_channel_intel(chaninfft3db[1], data_out.i1); + write_channel_intel(chaninfft3db[2], data_out.i2); + write_channel_intel(chaninfft3db[3], data_out.i3); + write_channel_intel(chaninfft3db[4], data_out.i4); + write_channel_intel(chaninfft3db[5], data_out.i5); + write_channel_intel(chaninfft3db[6], data_out.i6); + write_channel_intel(chaninfft3db[7], data_out.i7); + } + } +} + +kernel void fft3db(int inverse) { + + /* The FFT engine requires a sliding window for data reordering; data stored + * in this array is carried across loop iterations and shifted by 1 element + * every iteration; all loop dependencies derived from the uses of this + * array are simple transfers between adjacent array elements + */ + + float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; + + #pragma loop_coalesce + for(unsigned j = 0; j < N; j++){ + for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { + float2x8 data; + + if (i < N * (N / POINTS)) { + data.i0 = read_channel_intel(chaninfft3db[0]); + data.i1 = read_channel_intel(chaninfft3db[1]); + data.i2 = read_channel_intel(chaninfft3db[2]); + data.i3 = read_channel_intel(chaninfft3db[3]); + data.i4 = read_channel_intel(chaninfft3db[4]); + data.i5 = read_channel_intel(chaninfft3db[5]); + data.i6 = read_channel_intel(chaninfft3db[6]); + data.i7 = read_channel_intel(chaninfft3db[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + + data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); + + if (i >= N / POINTS - 1) { + write_channel_intel(chaninTranspose3D[0], data.i0); + write_channel_intel(chaninTranspose3D[1], data.i1); + write_channel_intel(chaninTranspose3D[2], data.i2); + write_channel_intel(chaninTranspose3D[3], data.i3); + write_channel_intel(chaninTranspose3D[4], data.i4); + write_channel_intel(chaninTranspose3D[5], data.i5); + write_channel_intel(chaninTranspose3D[6], data.i6); + write_channel_intel(chaninTranspose3D[7], data.i7); + } + } + } +} + +kernel void transpose3D( + __global __attribute__((buffer_location(DDR_BUFFER_LOCATION))) float2 * restrict src, + __global __attribute__((buffer_location(DDR_BUFFER_LOCATION))) float2 * restrict dest, + const int mode) { + + const int initial_delay = (1 << (LOGN - LOGPOINTS)); // N / 8 for the bitrev buffers + bool is_bufA = false, is_bitrevA = false; + bool is_bufB = false, is_bitrevB = false; + + float2 buf[2][DEPTH][POINTS]; + float2 buf_batch[2][DEPTH][POINTS]; + //float2 bufB[2][DEPTH][POINTS]; + //float2 buf_wr[2][DEPTH][POINTS]; + //float2 buf_rd[2][DEPTH][POINTS]; + + //float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; + float2 bitrev_in[2][N]; + float2 __attribute__((memory, numbanks(8))) bitrev_out[2][N]; + + float2 bitrev_in_batch[2][N]; + float2 __attribute__((memory, numbanks(8))) bitrev_out_batch[2][N]; + + // additional iterations to fill the buffers + //#pragma ivdep array(bitrev_out) + #pragma ivdep safelen(16) + for(int step = -initial_delay; step < ((N * DEPTH) + DEPTH); step++){ + + float2x8 data, data_out; + float2x8 data_wr, data_wr_out; + switch(mode){ + case WR_GLOBALMEM: { + if (step < ((N * DEPTH) - initial_delay)) { + data.i0 = read_channel_intel(chaninTranspose3D[0]); + data.i1 = read_channel_intel(chaninTranspose3D[1]); + data.i2 = read_channel_intel(chaninTranspose3D[2]); + data.i3 = read_channel_intel(chaninTranspose3D[3]); + data.i4 = read_channel_intel(chaninTranspose3D[4]); + data.i5 = read_channel_intel(chaninTranspose3D[5]); + data.i6 = read_channel_intel(chaninTranspose3D[6]); + data.i7 = read_channel_intel(chaninTranspose3D[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + // Swap buffers every N*N/8 iterations + // starting from the additional delay of N/8 iterations + is_bufA = (( step & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; + + // Swap bitrev buffers every N/8 iterations + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + unsigned row = step & (DEPTH - 1); + data = bitreverse_in(data, + is_bitrevA ? bitrev_in[0] : bitrev_in[1], + is_bitrevA ? bitrev_in[1] : bitrev_in[0], + row); + + writeBuf(data, + is_bufA ? buf[0] : buf[1], + step, 0); + + data_out = readBuf_store( + is_bufA ? buf[1] : buf[0], + step); + + if (step >= (DEPTH)) { + unsigned index = (step - DEPTH) * 8; + + dest[index + 0] = data_out.i0; + dest[index + 1] = data_out.i1; + dest[index + 2] = data_out.i2; + dest[index + 3] = data_out.i3; + dest[index + 4] = data_out.i4; + dest[index + 5] = data_out.i5; + dest[index + 6] = data_out.i6; + dest[index + 7] = data_out.i7; + } + + //printf("End Wr Transpose3d %d\n", step); + break; + } // condition for writing to global memory + case RD_GLOBALMEM: { + + unsigned step_rd = step + initial_delay; + // increment z by 1 every N/8 steps until (N*N/ 8) + unsigned start_index = step_rd + initial_delay; + unsigned zdim = (step_rd >> (LOGN - LOGPOINTS)) & (N - 1); + + // increment y by 1 every N*N/8 points until N + unsigned ydim = (step_rd >> (LOGN + LOGN - LOGPOINTS)) & (N - 1); + + // increment by 8 until N / 8 + unsigned xdim = (step_rd * 8) & (N - 1); + + // increment by 1 every N*N*N / 8 steps + unsigned batch_index = (step_rd >> (LOGN + LOGN + LOGN - LOGPOINTS)); + + unsigned index = (batch_index * N * N * N) + (zdim * N * N) + (ydim * N) + xdim; + + //float2x8 data, data_out; + if (step < (N * DEPTH)) { + data.i0 = src[index + 0]; + data.i1 = src[index + 1]; + data.i2 = src[index + 2]; + data.i3 = src[index + 3]; + data.i4 = src[index + 4]; + data.i5 = src[index + 5]; + data.i6 = src[index + 6]; + data.i7 = src[index + 7]; + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + + is_bufA = (( step_rd & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; + + // Swap bitrev buffers every N/8 iterations + is_bitrevA = ( (step_rd & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + writeBuf(data, + is_bufA ? buf[0] : buf[1], + step_rd, 0); + + data_out = readBuf_fetch( + is_bufA ? buf[1] : buf[0], + step_rd, 0); + + unsigned start_row = step_rd & (DEPTH -1); + data_out = bitreverse_out( + is_bitrevA ? bitrev_out[0] : bitrev_out[1], + is_bitrevA ? bitrev_out[1] : bitrev_out[0], + data_out, start_row); + + if (step_rd >= (DEPTH + initial_delay)) { + + write_channel_intel(chaninfft3dc[0], data_out.i0); + write_channel_intel(chaninfft3dc[1], data_out.i1); + write_channel_intel(chaninfft3dc[2], data_out.i2); + write_channel_intel(chaninfft3dc[3], data_out.i3); + write_channel_intel(chaninfft3dc[4], data_out.i4); + write_channel_intel(chaninfft3dc[5], data_out.i5); + write_channel_intel(chaninfft3dc[6], data_out.i6); + write_channel_intel(chaninfft3dc[7], data_out.i7); + } + + //printf("End Rd Transpose3d %d\n", step); + break; + } // condition for reading from global memory + case BATCH:{ + + /* + * Writing to global mem + */ + if (step < ((N * DEPTH) - initial_delay)) { + data.i0 = read_channel_intel(chaninTranspose3D[0]); + data.i1 = read_channel_intel(chaninTranspose3D[1]); + data.i2 = read_channel_intel(chaninTranspose3D[2]); + data.i3 = read_channel_intel(chaninTranspose3D[3]); + data.i4 = read_channel_intel(chaninTranspose3D[4]); + data.i5 = read_channel_intel(chaninTranspose3D[5]); + data.i6 = read_channel_intel(chaninTranspose3D[6]); + data.i7 = read_channel_intel(chaninTranspose3D[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + // Swap buffers every N*N/8 iterations + // starting from the additional delay of N/8 iterations + is_bufA = (( step & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; + + // Swap bitrev buffers every N/8 iterations + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + unsigned row = step & (DEPTH - 1); + data = bitreverse_in(data, + is_bitrevA ? bitrev_in_batch[0] : bitrev_in_batch[1], + is_bitrevA ? bitrev_in_batch[1] : bitrev_in_batch[0], + row); + + writeBuf(data, + is_bufA ? buf[0] : buf[1], + step, 0); + + data_out = readBuf_store( + is_bufA ? buf[1] : buf[0], + step); + + if (step >= (DEPTH)) { + unsigned index = (step - DEPTH) * 8; + + dest[index + 0] = data_out.i0; + dest[index + 1] = data_out.i1; + dest[index + 2] = data_out.i2; + dest[index + 3] = data_out.i3; + dest[index + 4] = data_out.i4; + dest[index + 5] = data_out.i5; + dest[index + 6] = data_out.i6; + dest[index + 7] = data_out.i7; + } + + /* + * Reading from global mem + */ + unsigned step_rd = step + initial_delay; + unsigned start_index = step_rd + initial_delay; + unsigned zdim = (step_rd >> (LOGN - LOGPOINTS)) & (N - 1); + + // increment y by 1 every N*N/8 points until N + unsigned ydim = (step_rd >> (LOGN + LOGN - LOGPOINTS)) & (N - 1); + + // increment by 8 until N / 8 + unsigned xdim = (step_rd * 8) & (N - 1); + + // increment by 1 every N*N*N / 8 steps + unsigned batch_index = (step_rd >> (LOGN + LOGN + LOGN - LOGPOINTS)); + + unsigned index = (batch_index * N * N * N) + (zdim * N * N) + (ydim * N) + xdim; + + if (step_rd < (N * DEPTH)) { + data_wr.i0 = src[index + 0]; + data_wr.i1 = src[index + 1]; + data_wr.i2 = src[index + 2]; + data_wr.i3 = src[index + 3]; + data_wr.i4 = src[index + 4]; + data_wr.i5 = src[index + 5]; + data_wr.i6 = src[index + 6]; + data_wr.i7 = src[index + 7]; + } else { + data_wr.i0 = data_wr.i1 = data_wr.i2 = data_wr.i3 = + data_wr.i4 = data_wr.i5 = data_wr.i6 = data_wr.i7 = 0; + } + + is_bufB = (( step_rd & (DEPTH - 1)) == 0) ? !is_bufB: is_bufB; + + // Swap bitrev buffers every N/8 iterations + is_bitrevB = ( (step_rd & ((N / 8) - 1)) == 0) ? !is_bitrevB: is_bitrevB; + + writeBuf(data_wr, + is_bufB ? buf_batch[0] : buf_batch[1], + step_rd, 0); + + data_wr_out = readBuf_fetch( + is_bufB ? buf_batch[1] : buf_batch[0], + step_rd, 0); + + unsigned start_row = step_rd & (DEPTH -1); + data_wr_out = bitreverse_out( + is_bitrevB ? bitrev_out_batch[0] : bitrev_out_batch[1], + is_bitrevB ? bitrev_out_batch[1] : bitrev_out_batch[0], + data_wr_out, start_row); + + if (step_rd >= (DEPTH + initial_delay)) { + + write_channel_intel(chaninfft3dc[0], data_wr_out.i0); + write_channel_intel(chaninfft3dc[1], data_wr_out.i1); + write_channel_intel(chaninfft3dc[2], data_wr_out.i2); + write_channel_intel(chaninfft3dc[3], data_wr_out.i3); + write_channel_intel(chaninfft3dc[4], data_wr_out.i4); + write_channel_intel(chaninfft3dc[5], data_wr_out.i5); + write_channel_intel(chaninfft3dc[6], data_wr_out.i6); + write_channel_intel(chaninfft3dc[7], data_wr_out.i7); + } + break; + } // condition for batch mode + } + } + //printf("End Transpose3d \n"); +} + +kernel void fft3dc(int inverse) { + + /* The FFT engine requires a sliding window for data reordering; data stored + * in this array is carried across loop iterations and shifted by 1 element + * every iteration; all loop dependencies derived from the uses of this + * array are simple transfers between adjacent array elements + */ + + float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; + + #pragma loop_coalesce + for(unsigned j = 0; j < N; j++){ + + for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { + float2x8 data; + + if (i < N * (N / POINTS)) { + data.i0 = read_channel_intel(chaninfft3dc[0]); + data.i1 = read_channel_intel(chaninfft3dc[1]); + data.i2 = read_channel_intel(chaninfft3dc[2]); + data.i3 = read_channel_intel(chaninfft3dc[3]); + data.i4 = read_channel_intel(chaninfft3dc[4]); + data.i5 = read_channel_intel(chaninfft3dc[5]); + data.i6 = read_channel_intel(chaninfft3dc[6]); + data.i7 = read_channel_intel(chaninfft3dc[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + + // Perform one FFT step + data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); + + // Write result to channels + if (i >= N / POINTS - 1) { + write_channel_intel(chaninStore[0], data.i0); + write_channel_intel(chaninStore[1], data.i1); + write_channel_intel(chaninStore[2], data.i2); + write_channel_intel(chaninStore[3], data.i3); + write_channel_intel(chaninStore[4], data.i4); + write_channel_intel(chaninStore[5], data.i5); + write_channel_intel(chaninStore[6], data.i6); + write_channel_intel(chaninStore[7], data.i7); + } + } + } +} + +kernel void store(__global __attribute__((buffer_location(SVM_HOST_BUFFER_LOCATION))) volatile float2 * restrict dest) { + + const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 + bool is_bufA = false, is_bitrevA = false; + + float2 buf[2][DEPTH][POINTS]; + float2 bitrev_in[2][N]; + //float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; + + int initial_delay = DELAY; // for each of the bitrev buffer + // additional iterations to fill the buffers + for(int step = -initial_delay; step < ((N * DEPTH) + DEPTH); step++){ + + float2x8 data, data_out; + if (step < ((N * DEPTH) - initial_delay)) { + data.i0 = read_channel_intel(chaninStore[0]); + data.i1 = read_channel_intel(chaninStore[1]); + data.i2 = read_channel_intel(chaninStore[2]); + data.i3 = read_channel_intel(chaninStore[3]); + data.i4 = read_channel_intel(chaninStore[4]); + data.i5 = read_channel_intel(chaninStore[5]); + data.i6 = read_channel_intel(chaninStore[6]); + data.i7 = read_channel_intel(chaninStore[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } + // Swap buffers every N*N/8 iterations + // starting from the additional delay of N/8 iterations + is_bufA = (( step & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; + + // Swap bitrev buffers every N/8 iterations + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + unsigned row = step & (DEPTH - 1); + data = bitreverse_in(data, + is_bitrevA ? bitrev_in[0] : bitrev_in[1], + is_bitrevA ? bitrev_in[1] : bitrev_in[0], + row); + + writeBuf(data, + is_bufA ? buf[0] : buf[1], + step, 0); + + data_out = readBuf_store( + is_bufA ? buf[1] : buf[0], + step); + + if (step >= (DEPTH)) { + unsigned start_index = (step - DEPTH); + // increment z by 1 every N/8 steps until (N*N/ 8) + unsigned zdim = (start_index >> (LOGN - LOGPOINTS)) & (N - 1); + + // increment y by 1 every N*N/8 points until N + unsigned ydim = (start_index >> (LOGN + LOGN - LOGPOINTS)) & (N - 1); + + // incremenet by 8 until N / 8 + unsigned xdim = (start_index * 8) & ( N - 1); + //unsigned index = (step - DEPTH) * 8; + + // increment by N*N*N + unsigned cube = LOGN + LOGN + LOGN - LOGPOINTS; + + // increment by 1 every N*N*N / 8 steps + unsigned batch_index = (start_index >> cube); + //unsigned batch_index = 0; + + unsigned index = (batch_index * N * N * N) + (zdim * N * N) + (ydim * N) + xdim; + + dest[index + 0] = data_out.i0; + dest[index + 1] = data_out.i1; + dest[index + 2] = data_out.i2; + dest[index + 3] = data_out.i3; + dest[index + 4] = data_out.i4; + dest[index + 5] = data_out.i5; + dest[index + 6] = data_out.i6; + dest[index + 7] = data_out.i7; + } + } +} From 85b3e4f35b6d19ac77100863d9726ea7a665eaff Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Wed, 20 Jan 2021 13:30:09 +0100 Subject: [PATCH 39/76] fixed host code for svm, svm batch --- api/src/fft3d.c | 298 +++++++++++++++++++++++------------------------- 1 file changed, 144 insertions(+), 154 deletions(-) diff --git a/api/src/fft3d.c b/api/src/fft3d.c index b9124e8..f27c620 100644 --- a/api/src/fft3d.c +++ b/api/src/fft3d.c @@ -15,6 +15,9 @@ #include "opencl_utils.h" #include "misc.h" +#define WR_GLOBALMEM 0 +#define RD_GLOBALMEM 1 +#define BATCH 2 /** * \brief compute an out-of-place single precision complex 3D-FFT using the BRAM of the FPGA @@ -423,7 +426,10 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; cl_int status = 0; int num_pts = N * N * N; - + + // 0 - WR_GLOBALMEM, 1 - RD_GLOBALMEM, 2 - BATCH + int mode = WR_GLOBALMEM; + // if N is not a power of 2 if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || !(svm_enabled)){ return fft_time; @@ -433,23 +439,21 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, int inverse_int = (int)inv; // Setup kernels - cl_kernel fetch1_kernel = clCreateKernel(program, "fetchBitrev1", &status); - checkError(status, "Failed to create fetch1 kernel"); + cl_kernel fetch_kernel = clCreateKernel(program, "fetch", &status); + checkError(status, "Failed to create fetch kernel"); cl_kernel ffta_kernel = clCreateKernel(program, "fft3da", &status); checkError(status, "Failed to create fft3da kernel"); cl_kernel transpose_kernel = clCreateKernel(program, "transpose", &status); checkError(status, "Failed to create transpose kernel"); cl_kernel fftb_kernel = clCreateKernel(program, "fft3db", &status); checkError(status, "Failed to create fft3db kernel"); - cl_kernel store1_kernel = clCreateKernel(program, "transposeStore1", &status); - checkError(status, "Failed to create store1 kernel"); + cl_kernel transpose3D_kernel = clCreateKernel(program, "transpose3D", &status); + checkError(status, "Failed to create transpose3D kernel"); - cl_kernel fetch2_kernel = clCreateKernel(program, "fetchBitrev2", &status); - checkError(status, "Failed to create fetch2 kernel"); cl_kernel fftc_kernel = clCreateKernel(program, "fft3dc", &status); checkError(status, "Failed to create fft3dc kernel"); - cl_kernel store2_kernel = clCreateKernel(program, "transposeStore2", &status); - checkError(status, "Failed to create store2 kernel"); + cl_kernel store_kernel = clCreateKernel(program, "store", &status); + checkError(status, "Failed to create store kernel"); // Setup Queues to the kernels queue_setup(); @@ -502,9 +506,12 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, svmBufferCopyIn_timer = getTimeinMilliSec() - svmBufferCopyIn_timer; printf("\nSVM Buffer Copy In Time: %lfms\n", svmBufferCopyIn_timer); + /* + * kernel arguments + */ // write to fetch kernel using SVM based PCIe - status = clSetKernelArgSVMPointer(fetch1_kernel, 0, (void *)h_inData); - checkError(status, "Failed to set fetch1 kernel arg"); + status = clSetKernelArgSVMPointer(fetch_kernel, 0, (void *)h_inData); + checkError(status, "Failed to set fetch kernel arg"); status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); checkError(status, "Failed to set ffta kernel arg"); @@ -512,32 +519,43 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, checkError(status, "Failed to set fftb kernel arg"); // kernel stores to DDR memory - status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void*)&d_inOutData); - checkError(status, "Failed to set store1 kernel arg"); + status=clSetKernelArg(transpose3D_kernel, 0, sizeof(cl_mem), (void*)&d_inOutData); + checkError(status, "Failed to set transpose3D kernel arg"); // kernel fetches from DDR memory - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void*)&d_inOutData); - checkError(status, "Failed to set fetch2 kernel arg"); + status=clSetKernelArg(transpose3D_kernel, 1, sizeof(cl_mem), (void*)&d_inOutData); + checkError(status, "Failed to set transpose3D kernel arg"); + + mode = WR_GLOBALMEM; + + status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); + checkError(status, "Failed to set transpose3D kernel arg 2"); + status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); checkError(status, "Failed to set fftc kernel arg"); // kernel stores using SVM based PCIe to host - status = clSetKernelArgSVMPointer(store2_kernel, 0, (void*)h_outData); + status = clSetKernelArgSVMPointer(store_kernel, 0, (void*)h_outData); checkError(status, "Failed to set store2 kernel arg"); cl_event startExec_event, endExec_event; fft_time.exec_t = getTimeinMilliSec(); - status = clEnqueueTask(queue7, store2_kernel, 0, NULL, &endExec_event); + status = clEnqueueTask(queue7, store_kernel, 0, NULL, &endExec_event); checkError(status, "Failed to launch transpose kernel"); status = clEnqueueTask(queue6, fftc_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fft kernel"); - status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second transpose kernel"); + status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); - status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); + mode = RD_GLOBALMEM; + + status = clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); + checkError(status, "Failed to set transpose3D kernel arg 2"); + + status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fetch kernel"); status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); @@ -549,7 +567,7 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fft kernel"); - status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, &startExec_event); + status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, &startExec_event); checkError(status, "Failed to launch fetch kernel"); status = clFinish(queue7); @@ -605,10 +623,8 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, if (d_inOutData) clReleaseMemObject(d_inOutData); - if(fetch1_kernel) - clReleaseKernel(fetch1_kernel); - if(fetch2_kernel) - clReleaseKernel(fetch2_kernel); + if(fetch_kernel) + clReleaseKernel(fetch_kernel); if(ffta_kernel) clReleaseKernel(ffta_kernel); @@ -620,10 +636,11 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, if(transpose_kernel) clReleaseKernel(transpose_kernel); - if(store1_kernel) - clReleaseKernel(store1_kernel); - if(store2_kernel) - clReleaseKernel(store2_kernel); + if(transpose3D_kernel) + clReleaseKernel(transpose3D_kernel); + + if(store_kernel) + clReleaseKernel(store_kernel); fft_time.valid = 1; return fft_time; @@ -1120,6 +1137,9 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, const float2 *inp, float2 *out, bool cl_int status = 0; int num_pts = N * N * N; + // 0 - WR_GLOBALMEM, 1 - RD_GLOBALMEM, 2 - BATCH + int mode = WR_GLOBALMEM; + // if N is not a power of 2 if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || (how_many <= 0)){ return fft_time; @@ -1129,46 +1149,39 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, const float2 *inp, float2 *out, bool return fft_time; } -#ifdef VERBOSE - printf("Launching%s 3d FFT transform in DDR \n", inv ? " inverse":""); -#endif - // Can't pass bool to device, so convert it to int int inverse_int = (int)inv; // Setup kernels - cl_kernel fetch1_kernel = clCreateKernel(program, "fetchBitrev1", &status); - checkError(status, "Failed to create fetch1 kernel"); + cl_kernel fetch_kernel = clCreateKernel(program, "fetch", &status); + checkError(status, "Failed to create fetch kernel"); cl_kernel ffta_kernel = clCreateKernel(program, "fft3da", &status); checkError(status, "Failed to create fft3da kernel"); cl_kernel transpose_kernel = clCreateKernel(program, "transpose", &status); checkError(status, "Failed to create transpose kernel"); cl_kernel fftb_kernel = clCreateKernel(program, "fft3db", &status); checkError(status, "Failed to create fft3db kernel"); - cl_kernel store1_kernel = clCreateKernel(program, "transposeStore1", &status); - checkError(status, "Failed to create store1 kernel"); + cl_kernel transpose3D_kernel = clCreateKernel(program, "transpose3D", &status); + checkError(status, "Failed to create transpose3D kernel"); - cl_kernel fetch2_kernel = clCreateKernel(program, "fetchBitrev2", &status); - checkError(status, "Failed to create fetch2 kernel"); cl_kernel fftc_kernel = clCreateKernel(program, "fft3dc", &status); checkError(status, "Failed to create fft3dc kernel"); - cl_kernel store2_kernel = clCreateKernel(program, "transposeStore2", &status); - checkError(status, "Failed to create store2 kernel"); + cl_kernel store_kernel = clCreateKernel(program, "store", &status); + checkError(status, "Failed to create store kernel"); // Setup Queues to the kernels queue_setup(); // Device memory buffers: double buffers - cl_mem d_outData_0; - d_outData_0 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + cl_mem d_inOutData_0; + d_inOutData_0 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); checkError(status, "Failed to allocate output device buffer\n"); - cl_mem d_outData_1; - d_outData_1 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + cl_mem d_inOutData_1; + d_inOutData_1 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); checkError(status, "Failed to allocate output device buffer\n"); // allocate and initialize SVM buffers - float2 *h_inData[how_many], *h_outData[how_many]; for(size_t i = 0; i < how_many; i++){ h_inData[i] = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); @@ -1204,89 +1217,68 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, const float2 *inp, float2 *out, bool * kernel arguments */ // write to fetch kernel using SVM based PCIe - status = clSetKernelArgSVMPointer(fetch1_kernel, 0, (void *)h_inData[0]); + status = clSetKernelArgSVMPointer(fetch_kernel, 0, (void *)h_inData[0]); checkError(status, "Failed to set fetch1 kernel arg"); status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); checkError(status, "Failed to set ffta kernel arg"); + // transpose() has no arguments status=clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); checkError(status, "Failed to set fftb kernel arg"); // kernel stores to DDR memory - status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_outData_0); - checkError(status, "Failed to set store1 kernel arg"); + status=clSetKernelArg(transpose3D_kernel, 0, sizeof(cl_mem), (void*)&d_inOutData_1); + checkError(status, "Failed to set transpose3D kernel arg"); + + status=clSetKernelArg(transpose3D_kernel, 1, sizeof(cl_mem), (void*)&d_inOutData_0); + checkError(status, "Failed to set transpose3D kernel arg"); + + mode = WR_GLOBALMEM; + status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); + checkError(status, "Failed to set transpose3D kernel arg"); /* * First batch write phase */ fft_time.exec_t = getTimeinMilliSec(); - status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); + status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second transpose kernel"); - status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); + status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second fft kernel"); status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); checkError(status, "Failed to launch transpose kernel"); - status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second fft kernel"); + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); - status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second transpose kernel"); + status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); for(size_t i = 1; i < how_many; i++){ - /* - * Read phase of previous iteration - */ - // kernel fetches from DDR memory - // kernel stores using SVM based PCIe to host - if( (i % 2) == 1){ - // if odd number of batches - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData_0); - checkError(status, "Failed to set fetch2 kernel arg"); - - // Start fetch2 phase with same queue as store1 - status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - } - else{ - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData_1); - checkError(status, "Failed to set fetch2 kernel arg"); + status = clSetKernelArgSVMPointer(fetch_kernel, 0, (void *)h_inData[i]); + checkError(status, "Failed to set fetch kernel arg"); - // Start fetch2 phase with same queue as store1 - status = clEnqueueTask(queue8, fetch2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - } - status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fftc kernel arg"); - status = clSetKernelArgSVMPointer(store2_kernel, 0, (void *)h_outData[i-1]); - checkError(status, "Failed to set store2 kernel arg"); + status = clSetKernelArg(transpose3D_kernel, 0, sizeof(cl_mem), ((i % 2) == 1) ? (void*)&d_inOutData_0 : (void*)&d_inOutData_1); + checkError(status, "Failed to set transpose3D kernel arg 0"); - status = clEnqueueTask(queue6, fftc_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); + status = clSetKernelArg(transpose3D_kernel, 1, sizeof(cl_mem), ((i % 2) == 1) ? (void*)&d_inOutData_1 : (void*)&d_inOutData_0); + checkError(status, "Failed to set transpose3D kernel arg 1"); - status = clEnqueueTask(queue7, store2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); + mode = BATCH; + status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); + checkError(status, "Failed to set transpose3D kernel arg 2"); - /* - * write phase of current iteration - */ - // change write phase host and ddr ptrs - status = clSetKernelArgSVMPointer(fetch1_kernel, 0, (void *)h_inData[i]); - checkError(status, "Failed to set fetch1 kernel arg"); - if(i % 2 == 1){ - status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_outData_1); - checkError(status, "Failed to set store1 kernel arg"); - } - else{ - status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_outData_0); - checkError(status, "Failed to set store1 kernel arg"); - } + status = clSetKernelArgSVMPointer(store_kernel, 0, (void *)h_outData[i-1]); + checkError(status, "Failed to set store kernel arg"); - // Start write phase of current iteration - status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); + // Enqueue Tasks + status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose3D kernel"); + + status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fetch kernel"); status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); @@ -1298,56 +1290,55 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, const float2 *inp, float2 *out, bool status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); checkError(status, "Failed to launch second fft kernel"); - if(i % 2 == 1){ - status = clEnqueueTask(queue8, store1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second transpose kernel"); - } - else{ - status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second transpose kernel"); - } + status = clEnqueueTask(queue6, fftc_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue7, store_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch store kernel"); + + status = clFinish(queue1); + checkError(status, "Failed to finish queue1"); + status = clFinish(queue2); + checkError(status, "Failed to finish queue2"); + status = clFinish(queue3); + checkError(status, "Failed to finish queue3"); + status = clFinish(queue4); + checkError(status, "Failed to finish queue4"); + status = clFinish(queue5); + checkError(status, "Failed to finish queue5"); + status = clFinish(queue6); + checkError(status, "Failed to finish queue6"); + status = clFinish(queue7); + checkError(status, "Failed to finish queue7"); } - if(how_many % 2 == 1){ - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData_0); - checkError(status, "Failed to set fetch2 kernel arg"); + status = clSetKernelArg(transpose3D_kernel, 0, sizeof(cl_mem), ((how_many % 2) == 0) ? (void*)&d_inOutData_0 : (void*)&d_inOutData_1); + checkError(status, "Failed to set transpose3D kernel arg 0"); - status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - } - else{ - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_outData_1); - checkError(status, "Failed to set fetch2 kernel arg"); - status = clEnqueueTask(queue8, fetch2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - } - status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fftc kernel arg"); - status = clSetKernelArgSVMPointer(store2_kernel, 0, (void *)h_outData[how_many-1]); - checkError(status, "Failed to set store2 kernel arg"); + status = clSetKernelArg(transpose3D_kernel, 1, sizeof(cl_mem), ((how_many % 2) == 0) ? (void*)&d_inOutData_1 : (void*)&d_inOutData_0); + checkError(status, "Failed to set transpose3D kernel arg 1"); + + mode = RD_GLOBALMEM; + status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); + checkError(status, "Failed to set transpose3D kernel arg 2"); + + status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose3D kernel"); status = clEnqueueTask(queue6, fftc_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fft kernel"); - status = clEnqueueTask(queue7, store2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); - - status = clFinish(queue1); - checkError(status, "Failed to finish queue1"); - status = clFinish(queue2); - checkError(status, "Failed to finish queue2"); - status = clFinish(queue3); - checkError(status, "Failed to finish queue1"); - status = clFinish(queue4); - checkError(status, "Failed to finish queue2"); + status = clSetKernelArgSVMPointer(store_kernel, 0, (void *)h_outData[how_many]); + checkError(status, "Failed to set store kernel arg"); + status = clEnqueueTask(queue7, store_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch store kernel"); + status = clFinish(queue5); - checkError(status, "Failed to finish queue1"); + checkError(status, "Failed to finish queue5"); status = clFinish(queue6); - checkError(status, "Failed to finish queue2"); + checkError(status, "Failed to finish queue6"); status = clFinish(queue7); - checkError(status, "Failed to finish queue1"); - status = clFinish(queue8); - checkError(status, "Failed to finish queue2"); + checkError(status, "Failed to finish queue7"); fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; @@ -1374,15 +1365,13 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, const float2 *inp, float2 *out, bool queue_cleanup(); - if (d_outData_0) - clReleaseMemObject(d_outData_0); - if (d_outData_1) - clReleaseMemObject(d_outData_1); + if (d_inOutData_0) + clReleaseMemObject(d_inOutData_0); + if (d_inOutData_1) + clReleaseMemObject(d_inOutData_1); - if(fetch1_kernel) - clReleaseKernel(fetch1_kernel); - if(fetch2_kernel) - clReleaseKernel(fetch2_kernel); + if(fetch_kernel) + clReleaseKernel(fetch_kernel); if(ffta_kernel) clReleaseKernel(ffta_kernel); @@ -1394,10 +1383,11 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, const float2 *inp, float2 *out, bool if(transpose_kernel) clReleaseKernel(transpose_kernel); - if(store1_kernel) - clReleaseKernel(store1_kernel); - if(store2_kernel) - clReleaseKernel(store2_kernel); + if(transpose3D_kernel) + clReleaseKernel(transpose3D_kernel); + + if(store_kernel) + clReleaseKernel(store_kernel); fft_time.valid = 1; return fft_time; From 1e416ebefabb1667ffea0db23a3480ceaada5b62 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Thu, 4 Feb 2021 12:43:06 +0100 Subject: [PATCH 40/76] working svm batch --- api/include/fftfpga/fftfpga.h | 10 +- api/src/fft3d.c | 148 +++++++++++++------------- api/src/fftfpga.c | 6 +- api/src/svm.c | 127 +--------------------- examples/common/helper.c | 17 +-- examples/common/helper.h | 6 +- examples/fft3d_ddr_svm.c | 13 ++- examples/fft3d_ddr_svm_batch.c | 19 +++- kernels/fft3d/fft3d_ddr_batch.cl | 175 ++++--------------------------- 9 files changed, 151 insertions(+), 370 deletions(-) diff --git a/api/include/fftfpga/fftfpga.h b/api/include/fftfpga/fftfpga.h index 81dbb2f..5752d53 100755 --- a/api/include/fftfpga/fftfpga.h +++ b/api/include/fftfpga/fftfpga.h @@ -30,13 +30,15 @@ typedef struct { * Record time in milliseconds of different FPGA runtime stages */ typedef struct fpga_timing { - double pcie_read_t; /**< Time to read from DDR to host using PCIe bus */ + double pcie_read_t; /**< Time to read from DDR to host using PCIe bus */ double pcie_write_t; /**< Time to write from DDR to host using PCIe bus */ - double exec_t; /**< Kernel execution time from CPU wall clock time*/ + double exec_t; /**< Kernel execution time from CPU wall clock time */ double hw_pcie_read_t; /**< HW Counter Time to read from DDR to host using PCIe bus */ double hw_pcie_write_t; /**< HW Counter Time to write from DDR to host using PCIe bus */ - double hw_exec_t; /**< Kernel execution time from HW counters*/ - int valid; /**< Represents 1 signifying valid execution */ + double hw_exec_t; /**< Kernel execution time from HW counters */ + double svm_copyin_t; /**< Time to copy in data to SVM */ + double svm_copyout_t; /**< Time to copy data out of SVM */ + bool valid; /**< Represents true signifying valid execution */ } fpga_t; /** diff --git a/api/src/fft3d.c b/api/src/fft3d.c index f27c620..38aa71d 100644 --- a/api/src/fft3d.c +++ b/api/src/fft3d.c @@ -19,6 +19,9 @@ #define RD_GLOBALMEM 1 #define BATCH 2 +#define NON_BATCH_MODE 0 +#define BATCH_MODE 1 + /** * \brief compute an out-of-place single precision complex 3D-FFT using the BRAM of the FPGA * \param N : integer pointer addressing the size of FFT3d @@ -423,9 +426,9 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, const float2 *inp, float2 *out, bool inv) { * \return fpga_t : time taken in milliseconds for data transfers and execution */ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, bool interleaving) { - fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; + fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, false}; cl_int status = 0; - int num_pts = N * N * N; + unsigned num_pts = N * N * N; // 0 - WR_GLOBALMEM, 1 - RD_GLOBALMEM, 2 - BATCH int mode = WR_GLOBALMEM; @@ -447,7 +450,7 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, checkError(status, "Failed to create transpose kernel"); cl_kernel fftb_kernel = clCreateKernel(program, "fft3db", &status); checkError(status, "Failed to create fft3db kernel"); - cl_kernel transpose3D_kernel = clCreateKernel(program, "transpose3D", &status); + cl_kernel transpose3D_kernel= clCreateKernel(program, "transpose3D", &status); checkError(status, "Failed to create transpose3D kernel"); cl_kernel fftc_kernel = clCreateKernel(program, "fft3dc", &status); @@ -474,38 +477,29 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, h_inData = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); h_outData = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); - double svmBufferCopyIn_timer = getTimeinMilliSec(); + size_t num_bytes = num_pts * sizeof(float2); + double svm_copyin_t = 0.0; + svm_copyin_t = getTimeinMilliSec(); + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData, sizeof(float2) * num_pts, 0, NULL, NULL); checkError(status, "Failed to map input data"); // copy data into h_inData - size_t num_bytes = num_pts * sizeof(float2); memcpy(h_inData, inp, num_bytes); - /* - for(size_t i = 0; i < num_pts; i++){ - h_inData[i].x = inp[i].x; - h_inData[i].y = inp[i].y; - } - */ status = clEnqueueSVMUnmap(queue1, (void *)h_inData, 0, NULL, NULL); checkError(status, "Failed to unmap input data"); + fft_time.svm_copyin_t += getTimeinMilliSec() - svm_copyin_t; status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); checkError(status, "Failed to map input data"); // copy data into h_inData - for(size_t i = 0; i < num_pts; i++){ - h_outData[i].x = 0.0; - h_outData[i].y = 0.0; - } + memset(&h_outData[0], 0, num_bytes); status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); checkError(status, "Failed to unmap input data"); - svmBufferCopyIn_timer = getTimeinMilliSec() - svmBufferCopyIn_timer; - printf("\nSVM Buffer Copy In Time: %lfms\n", svmBufferCopyIn_timer); - /* * kernel arguments */ @@ -536,7 +530,7 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, // kernel stores using SVM based PCIe to host status = clSetKernelArgSVMPointer(store_kernel, 0, (void*)h_outData); - checkError(status, "Failed to set store2 kernel arg"); + checkError(status, "Failed to set store kernel arg"); cl_event startExec_event, endExec_event; @@ -594,24 +588,17 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, fft_time.hw_exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); - double svmBufferCopyout_timer = getTimeinMilliSec(); + double svm_copyout_t = 0.0; + svm_copyout_t = getTimeinMilliSec(); status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); checkError(status, "Failed to map out data"); memcpy(out, h_outData, num_bytes); - /* - for(size_t i = 0; i < num_pts; i++){ - out[i].x = h_outData[i].x; - out[i].y = h_outData[i].y; - } - */ status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); checkError(status, "Failed to unmap out data"); - - svmBufferCopyout_timer = getTimeinMilliSec() - svmBufferCopyout_timer; - printf("SVM Buffer Copy Out Time: %lfms\n\n", svmBufferCopyout_timer); + fft_time.svm_copyout_t += getTimeinMilliSec() - svm_copyout_t; if (h_inData) clSVMFree(context, h_inData); @@ -642,7 +629,7 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, if(store_kernel) clReleaseKernel(store_kernel); - fft_time.valid = 1; + fft_time.valid = true; return fft_time; } @@ -1133,19 +1120,13 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, const float2 *inp, float2 *out, bool inv * \return fpga_t : time taken in milliseconds for data transfers and execution */ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, const float2 *inp, float2 *out, bool inv, int how_many) { - fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; + fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, false}; cl_int status = 0; - int num_pts = N * N * N; - // 0 - WR_GLOBALMEM, 1 - RD_GLOBALMEM, 2 - BATCH - int mode = WR_GLOBALMEM; + int mode_transpose = WR_GLOBALMEM; // if N is not a power of 2 - if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || (how_many <= 0)){ - return fft_time; - } - - if(!svm_enabled){ + if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || (how_many <= 0) || !svm_enabled){ return fft_time; } @@ -1173,41 +1154,40 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, const float2 *inp, float2 *out, bool queue_setup(); // Device memory buffers: double buffers - cl_mem d_inOutData_0; - d_inOutData_0 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + unsigned num_pts = N * N * N; + + cl_mem d_inOutData_0 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); checkError(status, "Failed to allocate output device buffer\n"); - cl_mem d_inOutData_1; - d_inOutData_1 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + cl_mem d_inOutData_1 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); checkError(status, "Failed to allocate output device buffer\n"); // allocate and initialize SVM buffers + double svm_copyin_t = 0.0; float2 *h_inData[how_many], *h_outData[how_many]; for(size_t i = 0; i < how_many; i++){ h_inData[i] = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); h_outData[i] = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); + size_t num_bytes = num_pts * sizeof(float2); + + svm_copyin_t = getTimeinMilliSec(); status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData[i], sizeof(float2) * num_pts, 0, NULL, NULL); checkError(status, "Failed to map input data"); // copy data into h_inData - size_t stride = i * num_pts; - for(size_t j = 0; j < num_pts; j++){ - h_inData[i][j].x = inp[stride + j].x; - h_inData[i][j].y = inp[stride + j].y; - } + memcpy(&h_inData[i][0], &inp[i*num_pts], num_bytes); status = clEnqueueSVMUnmap(queue1, (void *)h_inData[i], 0, NULL, NULL); checkError(status, "Failed to unmap input data"); + fft_time.svm_copyin_t += getTimeinMilliSec() - svm_copyin_t; + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_outData[i], sizeof(float2) * num_pts, 0, NULL, NULL); checkError(status, "Failed to map input data"); - // copy data into h_inData - for(size_t j = 0; j < num_pts; j++){ - h_outData[i][j].x = 0.0; - h_outData[i][j].y = 0.0; - } + // set h_outData to 0 + memset(&h_outData[i][0], 0, num_bytes); status = clEnqueueSVMUnmap(queue1, (void *)h_outData[i], 0, NULL, NULL); checkError(status, "Failed to unmap input data"); @@ -1233,10 +1213,14 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, const float2 *inp, float2 *out, bool status=clSetKernelArg(transpose3D_kernel, 1, sizeof(cl_mem), (void*)&d_inOutData_0); checkError(status, "Failed to set transpose3D kernel arg"); - mode = WR_GLOBALMEM; - status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); + mode_transpose = WR_GLOBALMEM; + status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode_transpose); checkError(status, "Failed to set transpose3D kernel arg"); + status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftc kernel arg"); + + cl_event startExec_event, endExec_event; /* * First batch write phase */ @@ -1253,9 +1237,20 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, const float2 *inp, float2 *out, bool status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fft kernel"); - status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, &startExec_event); checkError(status, "Failed to launch fetch kernel"); + status = clFinish(queue1); + checkError(status, "Failed to finish queue1"); + status = clFinish(queue2); + checkError(status, "Failed to finish queue2"); + status = clFinish(queue3); + checkError(status, "Failed to finish queue3"); + status = clFinish(queue4); + checkError(status, "Failed to finish queue4"); + status = clFinish(queue5); + checkError(status, "Failed to finish queue5"); + for(size_t i = 1; i < how_many; i++){ status = clSetKernelArgSVMPointer(fetch_kernel, 0, (void *)h_inData[i]); @@ -1267,8 +1262,8 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, const float2 *inp, float2 *out, bool status = clSetKernelArg(transpose3D_kernel, 1, sizeof(cl_mem), ((i % 2) == 1) ? (void*)&d_inOutData_1 : (void*)&d_inOutData_0); checkError(status, "Failed to set transpose3D kernel arg 1"); - mode = BATCH; - status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); + mode_transpose = BATCH; + status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode_transpose); checkError(status, "Failed to set transpose3D kernel arg 2"); status = clSetKernelArgSVMPointer(store_kernel, 0, (void *)h_outData[i-1]); @@ -1312,14 +1307,14 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, const float2 *inp, float2 *out, bool checkError(status, "Failed to finish queue7"); } - status = clSetKernelArg(transpose3D_kernel, 0, sizeof(cl_mem), ((how_many % 2) == 0) ? (void*)&d_inOutData_0 : (void*)&d_inOutData_1); + status = clSetKernelArg(transpose3D_kernel, 0, sizeof(cl_mem), ((how_many % 2) == 0) ? (void*)&d_inOutData_1 : (void*)&d_inOutData_0); checkError(status, "Failed to set transpose3D kernel arg 0"); - status = clSetKernelArg(transpose3D_kernel, 1, sizeof(cl_mem), ((how_many % 2) == 0) ? (void*)&d_inOutData_1 : (void*)&d_inOutData_0); + status = clSetKernelArg(transpose3D_kernel, 1, sizeof(cl_mem), ((how_many % 2) == 0) ? (void*)&d_inOutData_0 : (void*)&d_inOutData_1); checkError(status, "Failed to set transpose3D kernel arg 1"); - mode = RD_GLOBALMEM; - status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); + mode_transpose = RD_GLOBALMEM; + status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode_transpose); checkError(status, "Failed to set transpose3D kernel arg 2"); status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); @@ -1328,9 +1323,9 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, const float2 *inp, float2 *out, bool status = clEnqueueTask(queue6, fftc_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fft kernel"); - status = clSetKernelArgSVMPointer(store_kernel, 0, (void *)h_outData[how_many]); + status = clSetKernelArgSVMPointer(store_kernel, 0, (void *)h_outData[how_many - 1]); checkError(status, "Failed to set store kernel arg"); - status = clEnqueueTask(queue7, store_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue7, store_kernel, 0, NULL, &endExec_event); checkError(status, "Failed to launch store kernel"); status = clFinish(queue5); @@ -1342,20 +1337,29 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, const float2 *inp, float2 *out, bool fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + cl_ulong kernel_start = 0, kernel_end = 0; + + clGetEventProfilingInfo(startExec_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); + clGetEventProfilingInfo(endExec_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end, NULL); + + fft_time.hw_exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); + + double svm_copyout_t = 0.0; for(size_t i = 0; i < how_many; i++){ - status = clEnqueueSVMMap(queue2, CL_TRUE, CL_MAP_READ, + // copy data into h_outData + size_t num_bytes = num_pts * sizeof(float2); + svm_copyout_t = getTimeinMilliSec(); + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, (void *)h_outData[i], sizeof(float2) * num_pts, 0, NULL, NULL); checkError(status, "Failed to map out data"); - size_t stride = i * num_pts; - for(size_t j = 0; j < num_pts; j++){ - out[stride + j].x = h_outData[i][j].x; - out[stride + j].y = h_outData[i][j].y; - } + memcpy(&out[i*num_pts], &h_outData[i][0], num_bytes); - status = clEnqueueSVMUnmap(queue2, (void *)h_outData[i], 0, NULL, NULL); + status = clEnqueueSVMUnmap(queue1, (void *)h_outData[i], 0, NULL, NULL); checkError(status, "Failed to unmap out data"); + fft_time.svm_copyout_t += getTimeinMilliSec() - svm_copyout_t; } for(size_t i = 0; i < how_many; i++){ @@ -1389,6 +1393,6 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, const float2 *inp, float2 *out, bool if(store_kernel) clReleaseKernel(store_kernel); - fft_time.valid = 1; + fft_time.valid = true; return fft_time; } diff --git a/api/src/fftfpga.c b/api/src/fftfpga.c index c7fe599..ce3101e 100755 --- a/api/src/fftfpga.c +++ b/api/src/fftfpga.c @@ -20,9 +20,11 @@ cl_device_id *devices; cl_device_id device = NULL; cl_context context = NULL; cl_program program = NULL; + cl_command_queue queue1 = NULL, queue2 = NULL, queue3 = NULL; cl_command_queue queue4 = NULL, queue5 = NULL, queue6 = NULL; cl_command_queue queue7 = NULL, queue8 = NULL; + //static int svm_handle; bool svm_enabled = false; @@ -91,19 +93,21 @@ int fpga_initialize(const char *platform_name, const char *path, bool use_svm){ cl_uint num_devices; devices = getDevices(platform, CL_DEVICE_TYPE_ALL, &num_devices); // Unable to find device for the OpenCL platform + printf("Number of devices: %u\n", num_devices); if(devices == NULL){ return -3; } // use the first device. device = devices[0]; + printf(" -- Choosing first device\n"); if(use_svm){ if(!check_valid_svm_device(device)){ return -5; } else{ - printf("Supports SVM \n"); + printf(" -- Supports SVM \n"); svm_enabled = true; } } diff --git a/api/src/svm.c b/api/src/svm.c index 3184aad..957bb44 100644 --- a/api/src/svm.c +++ b/api/src/svm.c @@ -2,20 +2,9 @@ #include #include #include "CL/opencl.h" -//#include "aocl_mmd.h" #include "svm.h" #include "opencl_utils.h" -/* -int replace(){ - const char* board_name; - aocl_mmd_offline_info_t info_id; - aocl_mmd_get_offline_info(info_id, ); - - return aocl_mmd_open(board_name); -} -*/ - /** * @brief Check if device support svm * @param device @@ -36,6 +25,7 @@ bool check_valid_svm_device(cl_device_id device){ checkError(status, "Failed to get device info"); if (caps && CL_DEVICE_SVM_COARSE_GRAIN_BUFFER){ + printf(" -- Found Coarse Grained Buffer SVM capability\n"); return true; } else if(caps && CL_DEVICE_SVM_FINE_GRAIN_BUFFER){ @@ -59,117 +49,4 @@ bool check_valid_svm_device(cl_device_id device){ return false; } return false; -} - - // Transfer Data to Global Memory or allocate SVM buffer - /* - size_t buf_size = sizeof(double2) * N * iter; - (double2 *)aocl_mmd_shared_mem_alloc(, buf_size, ) - h_inData = (double2 *)clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(double2) * N * iter, 0); - - h_outData = (float2 *)clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(double2) * N * iter, 0); - - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, - (void *)h_inData, sizeof(double2) * N * iter, 0, NULL, NULL); - checkError(status, "Failed to map input data"); - - // Copy data from input file to SVM allocated memory. - for (int i = 0; i < N * iter; i++) { - h_inData[i].x = inp[i].x; - h_inData[i].y = inp[i].y; - } - - status = clEnqueueSVMUnmap(queue1, (void *)h_inData, 0, NULL, NULL); - checkError(status, "Failed to unmap input data"); - - // Can't pass bool to device, so convert it to int - int inverse_int = inv; - - // Create Kernels - names must match the kernel name in the original CL file - kernel1 = clCreateKernel(program, "fetch", &status); - checkError(status, "Failed to create fetch kernel"); - - kernel2 = clCreateKernel(program, "fft1d", &status); - checkError(status, "Failed to create fft1d kernel"); - // Set the kernel arguments - - status = clSetKernelArgSVMPointer(kernel1, 0, (void *)h_inData); - checkError(status, "Failed to set kernel1 arg 0"); - - status = clSetKernelArgSVMPointer(kernel2, 0, (void *)h_outData); - checkError(status, "Failed to set kernel1 arg 0"); - status = clSetKernelArgSVMPointer(kernel1, 0, (void *)h_inData); - checkError(status, "Failed to set kernel1 arg 0"); - - status = clSetKernelArgSVMPointer(kernel2, 0, (void *)h_outData); - - checkError(status, "Failed to set kernel arg 0"); - status = clSetKernelArg(kernel2, 1, sizeof(cl_int), (void*)&iter); - checkError(status, "Failed to set kernel arg 1"); - status = clSetKernelArg(kernel2, 2, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set kernel arg 2"); - - printf(inverse_int ? "\tInverse FFT" : "\tFFT"); - printf(" kernel initialization is complete.\n"); - - // Get the itertamp to evaluate performance - fft_time.exec_t = getTimeinMilliSec(); - - // Launch the kernel - we launch a single work item hence enqueue a task - status = clEnqueueTask(queue1, kernel1, 0, NULL, NULL); - checkError(status, "Failed to launch kernel"); - - size_t ls = N/8; - size_t gs = iter * ls; - status = clEnqueueNDRangeKernel(queue1, kernel2, 1, NULL, &gs, &ls, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - - // Wait for command queue to complete pending events - status = clFinish(queue1); - checkError(status, "Failed to finish"); - status = clFinish(queue2); - checkError(status, "Failed to finish queue1"); - - // Record execution time - fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, - (void *)h_outData, sizeof(float2) * N * iter, 0, NULL, NULL); - checkError(status, "Failed to map out data"); - - // Copy data from input file to SVM allocated memory. - for (int i = 0; i < N * iter; i++) { - out[i].x = h_outData[i].x; - out[i].y = h_outData[i].y; - } - - status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); - checkError(status, "Failed to unmap input data"); - - // Cleanup - if(kernel1) - clReleaseKernel(kernel1); - if(kernel2) - clReleaseKernel(kernel2); - queue_cleanup(); - if (h_inData) - clSVMFree(context, h_inData); - if (h_outData) - clSVMFree(context, h_outData); - } - */ - - - // return if SVM enabled but no device supported - /* - if(use_svm){ - // TODO: emulation and svm - if (check_valid_svm_device(device)){ - svm_enabled = 1; - } - else{ - fpga_final(); - return 1; - } - return 1; - } - */ +} \ No newline at end of file diff --git a/examples/common/helper.c b/examples/common/helper.c index f88e2d7..cdc0a1e 100755 --- a/examples/common/helper.c +++ b/examples/common/helper.c @@ -17,13 +17,13 @@ * \param N : number of points in the array * \return true if successful */ -bool fftf_create_data(float2 *inp, int N){ +bool fftf_create_data(float2 *inp, unsigned num_pts){ - if(inp == NULL || N <= 0){ + if(inp == NULL || num_pts <= 0){ return false; } - for(int i = 0; i < N; i++){ + for(size_t i = 0; i < num_pts; i++){ inp[i].x = (float)((float)rand() / (float)RAND_MAX); inp[i].y = (float)((float)rand() / (float)RAND_MAX); } @@ -33,17 +33,17 @@ bool fftf_create_data(float2 *inp, int N){ /** * \brief create random double precision complex floating point values - * \param inp : pointer to double2 data of size N - * \param N : number of points in the array + * \param inp : pointer to double2 data of size inp_sz + * \param inp_sz : number of points in the array * \return true if successful */ -bool fft_create_data(double2 *inp, int N){ +bool fft_create_data(double2 *inp, unsigned num_pts){ - if(inp == NULL || N <= 0 || N > 1024){ + if(inp == NULL || num_pts <= 0){ return false; } - for(int i = 0; i < N; i++){ + for(size_t i = 0; i < num_pts; i++){ inp[i].x = (double)((double)rand() / (double)RAND_MAX); inp[i].y = (double)((double)rand() / (double)RAND_MAX); } @@ -135,6 +135,7 @@ void display_measures(double total_api_time, double pcie_rd, double pcie_wr, dou printf("Hw Total = %.4lfms\n", hw_pcie_write + hw_execution + hw_pcie_read); printf("Throughput = %.4lfGFLOPS/s | %.4lf GB/s\n", gflops, gBytes_per_sec); printf("API runtime = %.4lfms\n", avg_api_time); + } /** diff --git a/examples/common/helper.h b/examples/common/helper.h index 36c99bb..206eedc 100755 --- a/examples/common/helper.h +++ b/examples/common/helper.h @@ -6,13 +6,13 @@ #include #include "fftfpga/fftfpga.h" -bool fftf_create_data(float2 *inp, int N); +bool fftf_create_data(float2 *inp, unsigned N); -bool fft_create_data(double2 *inp, int N); +bool fft_create_data(double2 *inp, unsigned N); void print_config(int N, int dim, int iter, bool inv, bool sp, int batch, bool use_bram, bool interleaving); -void display_measures(double total_api_time, double pcie_rd, double pcie_wr, double exec, double avg_shw_pcie_rd, double avg_hw_pcie_wr, double avg_hw_exec, int N, int dim, int iter, int batch, bool inv, bool sp); +void display_measures(double total_api_time, double pcie_rd, double pcie_wr, double exec, double avg_hw_pcie_rd, double avg_hw_pcie_wr, double avg_hw_exec, int N, int dim, int iter, int batch, bool inv, bool sp); double getTimeinMilliseconds(); #endif // HELPER_H diff --git a/examples/fft3d_ddr_svm.c b/examples/fft3d_ddr_svm.c index cf449d8..97b4a34 100755 --- a/examples/fft3d_ddr_svm.c +++ b/examples/fft3d_ddr_svm.c @@ -31,6 +31,7 @@ int main(int argc, const char **argv) { double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; double avg_hw_rd = 0.0, avg_hw_wr = 0.0, avg_hw_exec = 0.0; double temp_timer = 0.0, total_api_time = 0.0; + double avg_svm_copyin = 0.0, avg_svm_copyout = 0.0; struct argparse_option options[] = { OPT_HELP(), @@ -99,7 +100,7 @@ int main(int argc, const char **argv) { return EXIT_FAILURE; } #endif - if(timing.valid == 0){ + if(!timing.valid){ fprintf(stderr, "Invalid execution, timing found to be 0"); free(inp); free(out); @@ -112,6 +113,8 @@ int main(int argc, const char **argv) { avg_hw_rd += timing.hw_pcie_read_t; avg_hw_wr += timing.hw_pcie_write_t; avg_hw_exec += timing.hw_exec_t; + avg_svm_copyin += timing.svm_copyin_t; + avg_svm_copyout += timing.svm_copyout_t; printf("Iter: %lu\n", i); printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); @@ -122,6 +125,10 @@ int main(int argc, const char **argv) { printf("\tHW PCIe Rd: %lfms\n", timing.hw_pcie_read_t); printf("\tHW Kernel: %lfms\n", timing.hw_exec_t); printf("\tHW PCIe Wr: %lfms\n\n", timing.hw_pcie_write_t); + + printf("SVM Memcpy: \n"); + printf("\tHW Copy In: %lfms\n", timing.svm_copyin_t); + printf("\tHW Copy Out: %lfms\n\n", timing.svm_copyout_t); } // iter // destroy FFT input and output @@ -134,5 +141,9 @@ int main(int argc, const char **argv) { // display performance measures display_measures(total_api_time, avg_rd, avg_wr, avg_exec, avg_hw_rd, avg_hw_wr, avg_hw_exec, N, dim, iter, batch, inv, sp); + printf("\n"); + printf("SVM Copy In = %.4lfms\n", avg_svm_copyin / iter); + printf("SVM Copy Out = %.4lfms\n", avg_svm_copyout / iter); + return EXIT_SUCCESS; } diff --git a/examples/fft3d_ddr_svm_batch.c b/examples/fft3d_ddr_svm_batch.c index c335453..519cc4d 100755 --- a/examples/fft3d_ddr_svm_batch.c +++ b/examples/fft3d_ddr_svm_batch.c @@ -26,9 +26,10 @@ int main(int argc, const char **argv) { char *path = "fft3d_emulate.aocx"; const char *platform; - fpga_t timing = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; + fpga_t timing = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0.0, false}; double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; double avg_hw_rd = 0.0, avg_hw_wr = 0.0, avg_hw_exec = 0.0; + double avg_svm_copyin = 0.0, avg_svm_copyout = 0.0; struct argparse_option options[] = { OPT_HELP(), @@ -68,14 +69,17 @@ int main(int argc, const char **argv) { } double total_api_time = 0.0; + // create and destroy data every iteration size_t inp_sz = sizeof(float2) * N * N * N * batch; float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); + unsigned num_pts = N*N*N * batch; + for(size_t i = 0; i < iter; i++){ - status = fftf_create_data(inp, N * N * N * batch); + status = fftf_create_data(inp, num_pts); if(!status){ fprintf(stderr, "Error in Data Creation \n"); free(inp); @@ -96,7 +100,7 @@ int main(int argc, const char **argv) { return EXIT_FAILURE; } #endif - if(timing.valid == 0){ + if(!timing.valid){ fprintf(stderr, "Invalid execution, timing found to be 0"); free(inp); free(out); @@ -109,6 +113,8 @@ int main(int argc, const char **argv) { avg_hw_rd += timing.hw_pcie_read_t; avg_hw_wr += timing.hw_pcie_write_t; avg_hw_exec += timing.hw_exec_t; + avg_svm_copyin += timing.svm_copyin_t; + avg_svm_copyout += timing.svm_copyout_t; printf("Iter: %lu\n", i); printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); @@ -120,6 +126,9 @@ int main(int argc, const char **argv) { printf("\tHW Kernel: %lfms\n", timing.hw_exec_t); printf("\tHW PCIe Wr: %lfms\n\n", timing.hw_pcie_write_t); + printf("SVM Memcpy: \n"); + printf("\tHW Copy In: %lfms\n", timing.svm_copyin_t); + printf("\tHW Copy Out: %lfms\n\n", timing.svm_copyout_t); } // iter // destroy FFT input and output @@ -132,5 +141,9 @@ int main(int argc, const char **argv) { // display performance measures display_measures(total_api_time, avg_rd, avg_wr, avg_exec, avg_hw_rd, avg_hw_wr, avg_hw_exec, N, dim, iter, batch, inv, sp); + printf("\n"); + printf("SVM Copy In = %.4lfms\n", avg_svm_copyin / iter); + printf("SVM Copy Out = %.4lfms\n", avg_svm_copyout / iter); + return EXIT_SUCCESS; } diff --git a/kernels/fft3d/fft3d_ddr_batch.cl b/kernels/fft3d/fft3d_ddr_batch.cl index a602f77..9e7fd89 100755 --- a/kernels/fft3d/fft3d_ddr_batch.cl +++ b/kernels/fft3d/fft3d_ddr_batch.cl @@ -238,28 +238,19 @@ kernel void transpose3D( bool is_bufA = false, is_bitrevA = false; bool is_bufB = false, is_bitrevB = false; - float2 buf[2][DEPTH][POINTS]; - float2 buf_batch[2][DEPTH][POINTS]; - //float2 bufB[2][DEPTH][POINTS]; - //float2 buf_wr[2][DEPTH][POINTS]; - //float2 buf_rd[2][DEPTH][POINTS]; + float2 buf_wr[2][DEPTH][POINTS]; + float2 buf_rd[2][DEPTH][POINTS]; //float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; float2 bitrev_in[2][N]; float2 __attribute__((memory, numbanks(8))) bitrev_out[2][N]; - float2 bitrev_in_batch[2][N]; - float2 __attribute__((memory, numbanks(8))) bitrev_out_batch[2][N]; - // additional iterations to fill the buffers - //#pragma ivdep array(bitrev_out) - #pragma ivdep safelen(16) for(int step = -initial_delay; step < ((N * DEPTH) + DEPTH); step++){ float2x8 data, data_out; float2x8 data_wr, data_wr_out; - switch(mode){ - case WR_GLOBALMEM: { + if(mode == WR_GLOBALMEM || mode == BATCH){ if (step < ((N * DEPTH) - initial_delay)) { data.i0 = read_channel_intel(chaninTranspose3D[0]); data.i1 = read_channel_intel(chaninTranspose3D[1]); @@ -273,6 +264,7 @@ kernel void transpose3D( data.i0 = data.i1 = data.i2 = data.i3 = data.i4 = data.i5 = data.i6 = data.i7 = 0; } + // Swap buffers every N*N/8 iterations // starting from the additional delay of N/8 iterations is_bufA = (( step & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; @@ -287,11 +279,11 @@ kernel void transpose3D( row); writeBuf(data, - is_bufA ? buf[0] : buf[1], + is_bufA ? buf_wr[0] : buf_wr[1], step, 0); data_out = readBuf_store( - is_bufA ? buf[1] : buf[0], + is_bufA ? buf_wr[1] : buf_wr[0], step); if (step >= (DEPTH)) { @@ -306,11 +298,8 @@ kernel void transpose3D( dest[index + 6] = data_out.i6; dest[index + 7] = data_out.i7; } - - //printf("End Wr Transpose3d %d\n", step); - break; } // condition for writing to global memory - case RD_GLOBALMEM: { + if(mode == RD_GLOBALMEM || mode == BATCH){ unsigned step_rd = step + initial_delay; // increment z by 1 every N/8 steps until (N*N/ 8) @@ -326,136 +315,18 @@ kernel void transpose3D( // increment by 1 every N*N*N / 8 steps unsigned batch_index = (step_rd >> (LOGN + LOGN + LOGN - LOGPOINTS)); - unsigned index = (batch_index * N * N * N) + (zdim * N * N) + (ydim * N) + xdim; + unsigned index_wr = (batch_index * N * N * N) + (zdim * N * N) + (ydim * N) + xdim; //float2x8 data, data_out; if (step < (N * DEPTH)) { - data.i0 = src[index + 0]; - data.i1 = src[index + 1]; - data.i2 = src[index + 2]; - data.i3 = src[index + 3]; - data.i4 = src[index + 4]; - data.i5 = src[index + 5]; - data.i6 = src[index + 6]; - data.i7 = src[index + 7]; - } else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } - - is_bufA = (( step_rd & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; - - // Swap bitrev buffers every N/8 iterations - is_bitrevA = ( (step_rd & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; - - writeBuf(data, - is_bufA ? buf[0] : buf[1], - step_rd, 0); - - data_out = readBuf_fetch( - is_bufA ? buf[1] : buf[0], - step_rd, 0); - - unsigned start_row = step_rd & (DEPTH -1); - data_out = bitreverse_out( - is_bitrevA ? bitrev_out[0] : bitrev_out[1], - is_bitrevA ? bitrev_out[1] : bitrev_out[0], - data_out, start_row); - - if (step_rd >= (DEPTH + initial_delay)) { - - write_channel_intel(chaninfft3dc[0], data_out.i0); - write_channel_intel(chaninfft3dc[1], data_out.i1); - write_channel_intel(chaninfft3dc[2], data_out.i2); - write_channel_intel(chaninfft3dc[3], data_out.i3); - write_channel_intel(chaninfft3dc[4], data_out.i4); - write_channel_intel(chaninfft3dc[5], data_out.i5); - write_channel_intel(chaninfft3dc[6], data_out.i6); - write_channel_intel(chaninfft3dc[7], data_out.i7); - } - - //printf("End Rd Transpose3d %d\n", step); - break; - } // condition for reading from global memory - case BATCH:{ - - /* - * Writing to global mem - */ - if (step < ((N * DEPTH) - initial_delay)) { - data.i0 = read_channel_intel(chaninTranspose3D[0]); - data.i1 = read_channel_intel(chaninTranspose3D[1]); - data.i2 = read_channel_intel(chaninTranspose3D[2]); - data.i3 = read_channel_intel(chaninTranspose3D[3]); - data.i4 = read_channel_intel(chaninTranspose3D[4]); - data.i5 = read_channel_intel(chaninTranspose3D[5]); - data.i6 = read_channel_intel(chaninTranspose3D[6]); - data.i7 = read_channel_intel(chaninTranspose3D[7]); - } else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } - // Swap buffers every N*N/8 iterations - // starting from the additional delay of N/8 iterations - is_bufA = (( step & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; - - // Swap bitrev buffers every N/8 iterations - is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; - - unsigned row = step & (DEPTH - 1); - data = bitreverse_in(data, - is_bitrevA ? bitrev_in_batch[0] : bitrev_in_batch[1], - is_bitrevA ? bitrev_in_batch[1] : bitrev_in_batch[0], - row); - - writeBuf(data, - is_bufA ? buf[0] : buf[1], - step, 0); - - data_out = readBuf_store( - is_bufA ? buf[1] : buf[0], - step); - - if (step >= (DEPTH)) { - unsigned index = (step - DEPTH) * 8; - - dest[index + 0] = data_out.i0; - dest[index + 1] = data_out.i1; - dest[index + 2] = data_out.i2; - dest[index + 3] = data_out.i3; - dest[index + 4] = data_out.i4; - dest[index + 5] = data_out.i5; - dest[index + 6] = data_out.i6; - dest[index + 7] = data_out.i7; - } - - /* - * Reading from global mem - */ - unsigned step_rd = step + initial_delay; - unsigned start_index = step_rd + initial_delay; - unsigned zdim = (step_rd >> (LOGN - LOGPOINTS)) & (N - 1); - - // increment y by 1 every N*N/8 points until N - unsigned ydim = (step_rd >> (LOGN + LOGN - LOGPOINTS)) & (N - 1); - - // increment by 8 until N / 8 - unsigned xdim = (step_rd * 8) & (N - 1); - - // increment by 1 every N*N*N / 8 steps - unsigned batch_index = (step_rd >> (LOGN + LOGN + LOGN - LOGPOINTS)); - - unsigned index = (batch_index * N * N * N) + (zdim * N * N) + (ydim * N) + xdim; - - if (step_rd < (N * DEPTH)) { - data_wr.i0 = src[index + 0]; - data_wr.i1 = src[index + 1]; - data_wr.i2 = src[index + 2]; - data_wr.i3 = src[index + 3]; - data_wr.i4 = src[index + 4]; - data_wr.i5 = src[index + 5]; - data_wr.i6 = src[index + 6]; - data_wr.i7 = src[index + 7]; + data_wr.i0 = src[index_wr + 0]; + data_wr.i1 = src[index_wr + 1]; + data_wr.i2 = src[index_wr + 2]; + data_wr.i3 = src[index_wr + 3]; + data_wr.i4 = src[index_wr + 4]; + data_wr.i5 = src[index_wr + 5]; + data_wr.i6 = src[index_wr + 6]; + data_wr.i7 = src[index_wr + 7]; } else { data_wr.i0 = data_wr.i1 = data_wr.i2 = data_wr.i3 = data_wr.i4 = data_wr.i5 = data_wr.i6 = data_wr.i7 = 0; @@ -467,17 +338,17 @@ kernel void transpose3D( is_bitrevB = ( (step_rd & ((N / 8) - 1)) == 0) ? !is_bitrevB: is_bitrevB; writeBuf(data_wr, - is_bufB ? buf_batch[0] : buf_batch[1], + is_bufB ? buf_rd[0] : buf_rd[1], step_rd, 0); data_wr_out = readBuf_fetch( - is_bufB ? buf_batch[1] : buf_batch[0], + is_bufB ? buf_rd[1] : buf_rd[0], step_rd, 0); unsigned start_row = step_rd & (DEPTH -1); data_wr_out = bitreverse_out( - is_bitrevB ? bitrev_out_batch[0] : bitrev_out_batch[1], - is_bitrevB ? bitrev_out_batch[1] : bitrev_out_batch[0], + is_bitrevB ? bitrev_out[0] : bitrev_out[1], + is_bitrevB ? bitrev_out[1] : bitrev_out[0], data_wr_out, start_row); if (step_rd >= (DEPTH + initial_delay)) { @@ -491,11 +362,9 @@ kernel void transpose3D( write_channel_intel(chaninfft3dc[6], data_wr_out.i6); write_channel_intel(chaninfft3dc[7], data_wr_out.i7); } - break; - } // condition for batch mode - } + + } // condition for reading from global memory } - //printf("End Transpose3d \n"); } kernel void fft3dc(int inverse) { From 673e12292d91f0882c011708c804ef07a8329bc4 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Thu, 4 Feb 2021 17:28:08 +0100 Subject: [PATCH 41/76] no verify param --- examples/fft3d_ddr_svm_batch.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/examples/fft3d_ddr_svm_batch.c b/examples/fft3d_ddr_svm_batch.c index 519cc4d..639292d 100755 --- a/examples/fft3d_ddr_svm_batch.c +++ b/examples/fft3d_ddr_svm_batch.c @@ -31,17 +31,19 @@ int main(int argc, const char **argv) { double avg_hw_rd = 0.0, avg_hw_wr = 0.0, avg_hw_exec = 0.0; double avg_svm_copyin = 0.0, avg_svm_copyout = 0.0; + bool noverify = false; + struct argparse_option options[] = { OPT_HELP(), OPT_GROUP("Basic Options"), OPT_INTEGER('n',"n", &N, "FFT Points"), - OPT_BOOLEAN('s',"sp", &sp, "Single Precision"), OPT_INTEGER('i',"iter", &iter, "Iterations"), OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), OPT_INTEGER('c',"batch", &batch, "Batch"), OPT_BOOLEAN('t',"interleaving", &interleaving, "Use burst interleaving in case of BRAM designs"), OPT_STRING('p', "path", &path, "Path to bitstream"), OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), + OPT_BOOLEAN('y', "noverify", &noverify, "Don't verify results"), OPT_END(), }; @@ -93,11 +95,14 @@ int main(int argc, const char **argv) { total_api_time += getTimeinMilliseconds() - temp_timer; #ifdef USE_FFTW - if(!verify_fftwf(out, inp, N, 3, inv, batch)){ - fprintf(stderr, "3d FFT Verification Failed \n"); - free(inp); - free(out); - return EXIT_FAILURE; + if(noverify == false){ + printf("Verifying results for iteration %lu\n", i); + if(!verify_fftwf(out, inp, N, 3, inv, batch)){ + fprintf(stderr, "3d FFT Verification Failed \n"); + free(inp); + free(out); + return EXIT_FAILURE; + } } #endif if(!timing.valid){ From 74f4a063c4658fa855e1a557a90e81bea728fd49 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Thu, 4 Feb 2021 17:28:46 +0100 Subject: [PATCH 42/76] free forgotten --- examples/common/verify_fftw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/common/verify_fftw.c b/examples/common/verify_fftw.c index c0e2653..9b8823e 100644 --- a/examples/common/verify_fftw.c +++ b/examples/common/verify_fftw.c @@ -80,6 +80,7 @@ bool verify_fftwf(float2 *fpgaout, float2 *verify, int N, int dim, bool inverse, // Free FFTW data fftwf_free(fftw_data); + free(n); // destroy plan fftwf_destroy_plan(plan); From 0fea2a08c8593006d1138433e308fe8def8f32d3 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Wed, 29 Sep 2021 19:41:19 +0200 Subject: [PATCH 43/76] porting example to cpp, using cxxopts, cmake files to folder --- CMakeLists.txt | 51 ++---- cmake/FindFFTW.cmake | 339 ++++++++++++++++++++++++++++++++++++++++ cmake/extDep.cmake | 32 ++++ examples/CMakeLists.txt | 34 ++-- examples/fft.cpp | 13 ++ examples/helper.cpp | 86 ++++++++++ examples/helper.hpp | 27 ++++ extern/argparse | 1 - extern/findFFTW | 1 - extern/gtest | 1 - extern/hlslib | 1 - 11 files changed, 525 insertions(+), 61 deletions(-) create mode 100644 cmake/FindFFTW.cmake create mode 100644 cmake/extDep.cmake create mode 100644 examples/fft.cpp create mode 100644 examples/helper.cpp create mode 100644 examples/helper.hpp delete mode 160000 extern/argparse delete mode 160000 extern/findFFTW delete mode 160000 extern/gtest delete mode 160000 extern/hlslib diff --git a/CMakeLists.txt b/CMakeLists.txt index dfbcc32..31f57fb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,37 +1,24 @@ # Author: Arjun Ramaswami cmake_minimum_required (VERSION 3.10.3) - -project(fft) +project(fft VERSION 2.0 + DESCRIPTION "OpenCL based FFT library for Intel FPGAs" + LANGUAGES C CXX) set(CMAKE_C_STANDARD 11) - set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin) -# Setup submodules that are required -find_package(Git QUIET) -if(GIT_FOUND AND EXISTS "${PROJECT_SOURCE_DIR}/.git") -# Update submodules as needed - option(GIT_SUBMODULE "Check submodules during build" ON) - if(GIT_SUBMODULE) - message(STATUS "Submodule update") - execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - RESULT_VARIABLE GIT_SUBMOD_RESULT) - if(NOT GIT_SUBMOD_RESULT EQUAL "0") - message(FATAL_ERROR "git submodule update --init failed with ${GIT_SUBMOD_RESULT}, please checkout submodules") - endif() - endif() -endif() - -# Include hlslib in CMake module path -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR}/extern/hlslib/cmake) +# build external dependencies +message("-- Building external dependencies") +include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/extDep.cmake) -# Find hlslib Intel OpenCL kernels +## find Intel FPGA SDK for OpenCL find_package(IntelFPGAOpenCL REQUIRED) +message("-- IntelFPGAOpenCL found") -# Find FFTW -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR}/extern/findFFTW) +## find FFTW +list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") find_package(FFTW REQUIRED) +message("-- FFTW found") # Link argparse as static library add_subdirectory(${CMAKE_SOURCE_DIR}/extern/argparse) @@ -41,15 +28,7 @@ add_subdirectory(api) add_subdirectory(kernels) add_subdirectory(examples) -# GTest Setup -if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME) - option(PACKAGE_TESTS "Build the tests" ON) -endif() - -if(PACKAGE_TESTS) - enable_testing() - include(GoogleTest) - add_subdirectory(tests) -endif() - -# TODO: shift to external project instead of submodules as suggested by GTest \ No newline at end of file +# build tests +message("-- Building tests") +add_subdirectory(tests) +enable_testing() \ No newline at end of file diff --git a/cmake/FindFFTW.cmake b/cmake/FindFFTW.cmake new file mode 100644 index 0000000..8b7ef3c --- /dev/null +++ b/cmake/FindFFTW.cmake @@ -0,0 +1,339 @@ +# - Find the FFTW library +# +# Original version of this file: +# Copyright (c) 2015, Wenzel Jakob +# https://github.com/wjakob/layerlab/blob/master/cmake/FindFFTW.cmake, commit 4d58bfdc28891b4f9373dfe46239dda5a0b561c6 +# Modifications: +# Copyright (c) 2017, Patrick Bos +# +# Usage: +# find_package(FFTW [REQUIRED] [QUIET] [COMPONENTS component1 ... componentX] ) +# +# It sets the following variables: +# FFTW_FOUND ... true if fftw is found on the system +# FFTW_[component]_LIB_FOUND ... true if the component is found on the system (see components below) +# FFTW_LIBRARIES ... full paths to all found fftw libraries +# FFTW_[component]_LIB ... full path to one of the components (see below) +# FFTW_INCLUDE_DIRS ... fftw include directory paths +# +# The following variables will be checked by the function +# FFTW_USE_STATIC_LIBS ... if true, only static libraries are found, otherwise both static and shared. +# FFTW_ROOT ... if set, the libraries are exclusively searched +# under this path +# +# This package supports the following components: +# FLOAT_LIB +# DOUBLE_LIB +# LONGDOUBLE_LIB +# FLOAT_THREADS_LIB +# DOUBLE_THREADS_LIB +# LONGDOUBLE_THREADS_LIB +# FLOAT_OPENMP_LIB +# DOUBLE_OPENMP_LIB +# LONGDOUBLE_OPENMP_LIB +# + +# TODO (maybe): extend with ExternalProject download + build option +# TODO: put on conda-forge + + +if( NOT FFTW_ROOT AND DEFINED ENV{FFTWDIR} ) + set( FFTW_ROOT $ENV{FFTWDIR} ) +endif() + +# Check if we can use PkgConfig +find_package(PkgConfig) + +#Determine from PKG +if( PKG_CONFIG_FOUND AND NOT FFTW_ROOT ) + pkg_check_modules( PKG_FFTW QUIET "fftw3" ) +endif() + +#Check whether to search static or dynamic libs +set( CMAKE_FIND_LIBRARY_SUFFIXES_SAV ${CMAKE_FIND_LIBRARY_SUFFIXES} ) + +if( ${FFTW_USE_STATIC_LIBS} ) + set( CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_STATIC_LIBRARY_SUFFIX} ) +else() + set( CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES_SAV} ) +endif() + +if( FFTW_ROOT ) + # find libs + + find_library( + FFTW_DOUBLE_LIB + NAMES "fftw3" libfftw3-3 + PATHS ${FFTW_ROOT} + PATH_SUFFIXES "lib" "lib64" + NO_DEFAULT_PATH + ) + + find_library( + FFTW_DOUBLE_THREADS_LIB + NAMES "fftw3_threads" + PATHS ${FFTW_ROOT} + PATH_SUFFIXES "lib" "lib64" + NO_DEFAULT_PATH + ) + + find_library( + FFTW_DOUBLE_OPENMP_LIB + NAMES "fftw3_omp" + PATHS ${FFTW_ROOT} + PATH_SUFFIXES "lib" "lib64" + NO_DEFAULT_PATH + ) + + find_library( + FFTW_FLOAT_LIB + NAMES "fftw3f" libfftw3f-3 + PATHS ${FFTW_ROOT} + PATH_SUFFIXES "lib" "lib64" + NO_DEFAULT_PATH + ) + + find_library( + FFTW_FLOAT_THREADS_LIB + NAMES "fftw3f_threads" + PATHS ${FFTW_ROOT} + PATH_SUFFIXES "lib" "lib64" + NO_DEFAULT_PATH + ) + + find_library( + FFTW_FLOAT_OPENMP_LIB + NAMES "fftw3f_omp" + PATHS ${FFTW_ROOT} + PATH_SUFFIXES "lib" "lib64" + NO_DEFAULT_PATH + ) + + find_library( + FFTW_LONGDOUBLE_LIB + NAMES "fftw3l" libfftw3l-3 + PATHS ${FFTW_ROOT} + PATH_SUFFIXES "lib" "lib64" + NO_DEFAULT_PATH + ) + + find_library( + FFTW_LONGDOUBLE_THREADS_LIB + NAMES "fftw3l_threads" + PATHS ${FFTW_ROOT} + PATH_SUFFIXES "lib" "lib64" + NO_DEFAULT_PATH + ) + + find_library( + FFTW_LONGDOUBLE_OPENMP_LIB + NAMES "fftw3l_omp" + PATHS ${FFTW_ROOT} + PATH_SUFFIXES "lib" "lib64" + NO_DEFAULT_PATH + ) + + #find includes + find_path(FFTW_INCLUDE_DIRS + NAMES "fftw3.h" + PATHS ${FFTW_ROOT} + PATH_SUFFIXES "include" + NO_DEFAULT_PATH + ) + +else() + + find_library( + FFTW_DOUBLE_LIB + NAMES "fftw3" + PATHS ${PKG_FFTW_LIBRARY_DIRS} ${LIB_INSTALL_DIR} + ) + + find_library( + FFTW_DOUBLE_THREADS_LIB + NAMES "fftw3_threads" + PATHS ${PKG_FFTW_LIBRARY_DIRS} ${LIB_INSTALL_DIR} + ) + + find_library( + FFTW_DOUBLE_OPENMP_LIB + NAMES "fftw3_omp" + PATHS ${PKG_FFTW_LIBRARY_DIRS} ${LIB_INSTALL_DIR} + ) + + find_library( + FFTW_FLOAT_LIB + NAMES "fftw3f" + PATHS ${PKG_FFTW_LIBRARY_DIRS} ${LIB_INSTALL_DIR} + ) + + find_library( + FFTW_FLOAT_THREADS_LIB + NAMES "fftw3f_threads" + PATHS ${PKG_FFTW_LIBRARY_DIRS} ${LIB_INSTALL_DIR} + ) + + find_library( + FFTW_FLOAT_OPENMP_LIB + NAMES "fftw3f_omp" + PATHS ${PKG_FFTW_LIBRARY_DIRS} ${LIB_INSTALL_DIR} + ) + + find_library( + FFTW_LONGDOUBLE_LIB + NAMES "fftw3l" + PATHS ${PKG_FFTW_LIBRARY_DIRS} ${LIB_INSTALL_DIR} + ) + + find_library( + FFTW_LONGDOUBLE_THREADS_LIB + NAMES "fftw3l_threads" + PATHS ${PKG_FFTW_LIBRARY_DIRS} ${LIB_INSTALL_DIR} + ) + + find_library(FFTW_LONGDOUBLE_OPENMP_LIB + NAMES "fftw3l_omp" + PATHS ${PKG_FFTW_LIBRARY_DIRS} ${LIB_INSTALL_DIR} + ) + + find_path(FFTW_INCLUDE_DIRS + NAMES "fftw3.h" + PATHS ${PKG_FFTW_INCLUDE_DIRS} ${INCLUDE_INSTALL_DIR} + ) + +endif( FFTW_ROOT ) + +#--------------------------------------- components + +if (FFTW_DOUBLE_LIB) + set(FFTW_DOUBLE_LIB_FOUND TRUE) + set(FFTW_LIBRARIES ${FFTW_LIBRARIES} ${FFTW_DOUBLE_LIB}) + add_library(FFTW::Double INTERFACE IMPORTED) + set_target_properties(FFTW::Double + PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${FFTW_INCLUDE_DIRS}" + INTERFACE_LINK_LIBRARIES "${FFTW_DOUBLE_LIB}" + ) +else() + set(FFTW_DOUBLE_LIB_FOUND FALSE) +endif() + +if (FFTW_FLOAT_LIB) + set(FFTW_FLOAT_LIB_FOUND TRUE) + set(FFTW_LIBRARIES ${FFTW_LIBRARIES} ${FFTW_FLOAT_LIB}) + add_library(FFTW::Float INTERFACE IMPORTED) + set_target_properties(FFTW::Float + PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${FFTW_INCLUDE_DIRS}" + INTERFACE_LINK_LIBRARIES "${FFTW_FLOAT_LIB}" + ) +else() + set(FFTW_FLOAT_LIB_FOUND FALSE) +endif() + +if (FFTW_LONGDOUBLE_LIB) + set(FFTW_LONGDOUBLE_LIB_FOUND TRUE) + set(FFTW_LIBRARIES ${FFTW_LIBRARIES} ${FFTW_LONGDOUBLE_LIB}) + add_library(FFTW::LongDouble INTERFACE IMPORTED) + set_target_properties(FFTW::LongDouble + PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${FFTW_INCLUDE_DIRS}" + INTERFACE_LINK_LIBRARIES "${FFTW_LONGDOUBLE_LIB}" + ) +else() + set(FFTW_LONGDOUBLE_LIB_FOUND FALSE) +endif() + +if (FFTW_DOUBLE_THREADS_LIB) + set(FFTW_DOUBLE_THREADS_LIB_FOUND TRUE) + set(FFTW_LIBRARIES ${FFTW_LIBRARIES} ${FFTW_DOUBLE_THREADS_LIB}) + add_library(FFTW::DoubleThreads INTERFACE IMPORTED) + set_target_properties(FFTW::DoubleThreads + PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${FFTW_INCLUDE_DIRS}" + INTERFACE_LINK_LIBRARIES "${FFTW_DOUBLETHREADS_LIB}" + ) +else() + set(FFTW_DOUBLE_THREADS_LIB_FOUND FALSE) +endif() + +if (FFTW_FLOAT_THREADS_LIB) + set(FFTW_FLOAT_THREADS_LIB_FOUND TRUE) + set(FFTW_LIBRARIES ${FFTW_LIBRARIES} ${FFTW_FLOAT_THREADS_LIB}) + add_library(FFTW::FloatThreads INTERFACE IMPORTED) + set_target_properties(FFTW::FloatThreads + PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${FFTW_INCLUDE_DIRS}" + INTERFACE_LINK_LIBRARIES "${FFTW_FLOAT_THREADS_LIB}" + ) +else() + set(FFTW_FLOAT_THREADS_LIB_FOUND FALSE) +endif() + +if (FFTW_LONGDOUBLE_THREADS_LIB) + set(FFTW_LONGDOUBLE_THREADS_LIB_FOUND TRUE) + set(FFTW_LIBRARIES ${FFTW_LIBRARIES} ${FFTW_LONGDOUBLE_THREADS_LIB}) + add_library(FFTW::LongDoubleThreads INTERFACE IMPORTED) + set_target_properties(FFTW::LongDoubleThreads + PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${FFTW_INCLUDE_DIRS}" + INTERFACE_LINK_LIBRARIES "${FFTW_LONGDOUBLE_THREADS_LIB}" + ) +else() + set(FFTW_LONGDOUBLE_THREADS_LIB_FOUND FALSE) +endif() + +if (FFTW_DOUBLE_OPENMP_LIB) + set(FFTW_DOUBLE_OPENMP_LIB_FOUND TRUE) + set(FFTW_LIBRARIES ${FFTW_LIBRARIES} ${FFTW_DOUBLE_OPENMP_LIB}) + add_library(FFTW::DoubleOpenMP INTERFACE IMPORTED) + set_target_properties(FFTW::DoubleOpenMP + PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${FFTW_INCLUDE_DIRS}" + INTERFACE_LINK_LIBRARIES "${FFTW_DOUBLE_OPENMP_LIB}" + ) +else() + set(FFTW_DOUBLE_OPENMP_LIB_FOUND FALSE) +endif() + +if (FFTW_FLOAT_OPENMP_LIB) + set(FFTW_FLOAT_OPENMP_LIB_FOUND TRUE) + set(FFTW_LIBRARIES ${FFTW_LIBRARIES} ${FFTW_FLOAT_OPENMP_LIB}) + add_library(FFTW::FloatOpenMP INTERFACE IMPORTED) + set_target_properties(FFTW::FloatOpenMP + PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${FFTW_INCLUDE_DIRS}" + INTERFACE_LINK_LIBRARIES "${FFTW_FLOAT_OPENMP_LIB}" + ) +else() + set(FFTW_FLOAT_OPENMP_LIB_FOUND FALSE) +endif() + +if (FFTW_LONGDOUBLE_OPENMP_LIB) + set(FFTW_LONGDOUBLE_OPENMP_LIB_FOUND TRUE) + set(FFTW_LIBRARIES ${FFTW_LIBRARIES} ${FFTW_LONGDOUBLE_OPENMP_LIB}) + add_library(FFTW::LongDoubleOpenMP INTERFACE IMPORTED) + set_target_properties(FFTW::LongDoubleOpenMP + PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${FFTW_INCLUDE_DIRS}" + INTERFACE_LINK_LIBRARIES "${FFTW_LONGDOUBLE_OPENMP_LIB}" + ) +else() + set(FFTW_LONGDOUBLE_OPENMP_LIB_FOUND FALSE) +endif() + +#--------------------------------------- end components + +set( CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES_SAV} ) + +include(FindPackageHandleStandardArgs) + +find_package_handle_standard_args(FFTW + REQUIRED_VARS FFTW_INCLUDE_DIRS + HANDLE_COMPONENTS + ) + +mark_as_advanced( + FFTW_INCLUDE_DIRS + FFTW_LIBRARIES + FFTW_FLOAT_LIB + FFTW_DOUBLE_LIB + FFTW_LONGDOUBLE_LIB + FFTW_FLOAT_THREADS_LIB + FFTW_DOUBLE_THREADS_LIB + FFTW_LONGDOUBLE_THREADS_LIB + FFTW_FLOAT_OPENMP_LIB + FFTW_DOUBLE_OPENMP_LIB + FFTW_LONGDOUBLE_OPENMP_LIB + ) diff --git a/cmake/extDep.cmake b/cmake/extDep.cmake new file mode 100644 index 0000000..c533f17 --- /dev/null +++ b/cmake/extDep.cmake @@ -0,0 +1,32 @@ +cmake_minimum_required(VERSION 3.10.3) + +message("-- Fetching HLSLib") +# External Dependencies +## HLSLib - finds Intel OpenCL for FPGA installation +include(FetchContent) +FetchContent_Declare( + hlslib + GIT_REPOSITORY https://github.com/definelicht/hlslib.git + GIT_TAG 66462501a8779694f8b64b2c32cc59552cd59429 +) +FetchContent_MakeAvailable(hlslib) + +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${hlslib_SOURCE_DIR}/cmake) + +message("-- Fetching Cxxopts") +## CxxOpts - for command line argument parsing +FetchContent_Declare( + cxxopts + GIT_REPOSITORY https://github.com/jarro2783/cxxopts + GIT_TAG v2.2.1 +) +FetchContent_MakeAvailable(cxxopts) + +message("-- Fetching gTest") +## googleTest +FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG release-1.10.0 +) +FetchContent_MakeAvailable(googletest) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index ee8b145..94cc802 100755 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,34 +1,26 @@ -# Arjun Ramaswami cmake_minimum_required(VERSION 3.10) -project(examplesfftfpga VERSION 0.1 - DESCRIPTION "Example Code that uses libfftfpga" +project(examplesfftfpga VERSION 2.0 + DESCRIPTION "Sample Code that uses libfftfpga" LANGUAGES C CXX) -set(examples fft3d_ddr fft3d_bram fft3d_ddr_svm fft3d_ddr_batch fft3d_ddr_svm_batch fft2d fft1d fft1d_svm fft1d_batch) +set(examples fft) # create a target for each of the example foreach(example ${examples}) + add_executable(${example} + ${example}.cpp helper.cpp) - add_executable(${example} ${example}.c - common/helper.c - common/verify_fftw.c) - - target_compile_options(${example} - PRIVATE -Wall -Werror) + target_compile_options(${example} PRIVATE -Wall -Werror) target_include_directories(${example} - PRIVATE ${IntelFPGAOpenCL_INCLUDE_DIRS} - "${CMAKE_SOURCE_DIR}/extern/argparse" - common) + PRIVATE ${PROJECT_SOURCE_DIR} + ${IntelFPGAOpenCL_INCLUDE_DIRS} + ${CMAKE_BINARY_DIR}/include + ${FFTW_INCLUDE_DIRS}) - target_link_libraries(${example} - PRIVATE ${IntelFPGAOpenCL_LIBRARIES} fftfpga argparse m) - -if(FFTW_FOUND) target_compile_definitions(${example} PRIVATE USE_FFTW) - target_link_libraries(${example} PUBLIC fftw3 fftw3f) -else() - message(WARNING, "FFTW library not found. Cannot perform verification") -endif() + target_link_libraries(${example} + PRIVATE cxxopts fftfpga fftw3 fftw3f + ${IntelFPGAOpenCL_LIBRARIES}) endforeach() \ No newline at end of file diff --git a/examples/fft.cpp b/examples/fft.cpp new file mode 100644 index 0000000..f2c05a2 --- /dev/null +++ b/examples/fft.cpp @@ -0,0 +1,13 @@ +#include +#include "helper.hpp" + +using namespace std; + +int main(int argc, char* argv[]){ + + CONFIG config; + parse_args(argc, argv, config); + + print_config(config); + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/examples/helper.cpp b/examples/helper.cpp new file mode 100644 index 0000000..2ec4d57 --- /dev/null +++ b/examples/helper.cpp @@ -0,0 +1,86 @@ +#include +#include "cxxopts.hpp" +#include "helper.hpp" + +using namespace std; + +/** + * \brief create random single precision complex floating point values + * \param inp : pointer to float2 data of size N + * \param N : number of points in the array + * \return true if successful + */ +void create_data(float2 *inp, const unsigned num){ + + if(inp == NULL || num < 4){ throw "Bad args in create data function";} + + for(unsigned i = 0; i < num; i++){ + inp[i].x = (float)((float)rand() / (float)RAND_MAX); + inp[i].y = (float)((float)rand() / (float)RAND_MAX); + } +} + +/** + * \brief using cxxopts to parse cmd line args to the executable + * \param argc, argv + * \param config: custom structure of variables storing config values + */ +void parse_args(int argc, char* argv[], CONFIG &config){ + + try{ + cxxopts::Options options("./fft<..>", "Offloading FFT on FPGA"); + options.add_options() + ("n, num", "Number of sample points in a dimension", cxxopts::value()->default_value("64")) + ("d, dim", "Number of dimensions", cxxopts::value()->default_value("3")) + ("b, back", "Toggle Backward FFT", cxxopts::value()->default_value("false") ) + ("i, iter", "Number of iterations", cxxopts::value()->default_value("1")) + ("p, path", "Path to FPGA bitstream", cxxopts::value()) + ("y, noverify", "Toggle to not verify with FFTW", cxxopts::value()->default_value("false") ) + ("c, batch", "Number of batches of FFT calculations in FPGA", cxxopts::value()->default_value("1") ) + ("t, burst", "Toggle to use burst interleaved global memory accesses in FPGA", cxxopts::value()->default_value("false") ) + ("m, use_bram", "Toggle to use BRAM instead of DDR for 3D Transpose ", cxxopts::value()->default_value("false") ) + ("h,help", "Print usage"); + auto opt = options.parse(argc, argv); + + // print help + if (opt.count("help")){ + cout << options.help() << endl; + exit(0); + } + + config.num = opt["num"].as(); + config.dim = opt["dim"].as(); + config.inv = opt["back"].as(); + config.iter = opt["iter"].as(); + config.noverify = opt["noverify"].as(); + config.batch = opt["batch"].as(); + config.burst = opt["burst"].as(); + config.use_bram = opt["use_bram"].as(); + + if(opt.count("path")){ + config.path = opt["path"].as(); + } + else{ + throw "please input path to bitstream. Exiting! \n"; + } + } + catch(const char *msg){ + cerr << "Error parsing options: " << msg << endl; + exit(1); + } +} + +void print_config(CONFIG config){ + printf("\n------------------------------------------\n"); + printf("FFT CONFIGURATION: \n"); + printf("--------------------------------------------\n"); + printf("Type : Complex to Complex\n"); + printf("Points : %d%s \n", config.num, config.dim == 1 ? "" : config.dim == 2 ? "^2" : "^3"); + printf("Direction : %s \n", config.inv ? "Backward":"Forward"); + printf("Placement : In Place \n"); + printf("Batch : %d \n", config.batch); + printf("Iterations : %d \n", config.iter); + printf("Transpose3D : %s \n", config.use_bram ? "BRAM":"DDR"); + printf("Burst Interleaving : %s \n", config.burst ? "Yes":"No"); + printf("--------------------------------------------\n\n"); +} \ No newline at end of file diff --git a/examples/helper.hpp b/examples/helper.hpp new file mode 100644 index 0000000..e759dc8 --- /dev/null +++ b/examples/helper.hpp @@ -0,0 +1,27 @@ +#ifndef HELPER_HPP +#define HELPER_HPP + +#include +#include "fftfpga/fftfpga.h" + +struct CONFIG{ + unsigned num; + unsigned dim; + bool inv; + unsigned iter; + std::string path; + bool noverify; + unsigned batch; + bool burst; + bool use_bram; +}; + +void parse_args(int argc, char* argv[], CONFIG &config); + +void print_config(CONFIG config); + +double getTimeinMilliSec(); + +void create_data(float2 *inp, const unsigned num); + +#endif // HELPER_HPP \ No newline at end of file diff --git a/extern/argparse b/extern/argparse deleted file mode 160000 index 4ed6099..0000000 --- a/extern/argparse +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 4ed6099cb33245b06343518b9f3c45ac56e8283c diff --git a/extern/findFFTW b/extern/findFFTW deleted file mode 160000 index 05b6961..0000000 --- a/extern/findFFTW +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 05b696123f379245483f7b7a1ff4abeb6f490667 diff --git a/extern/gtest b/extern/gtest deleted file mode 160000 index 482ac6e..0000000 --- a/extern/gtest +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 482ac6ee63429af2aa9c44f4e6427873fb68fb1f diff --git a/extern/hlslib b/extern/hlslib deleted file mode 160000 index 2ecff07..0000000 --- a/extern/hlslib +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 2ecff07c77c089c58c0d8dd52641b740dadf5d1a From 2345a64357f3ce4c58d7f3db9f8a67f1f4de67a1 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Fri, 1 Oct 2021 14:40:17 +0200 Subject: [PATCH 44/76] removed argparse, refactored timing --- CMakeLists.txt | 3 - api/include/fftfpga/fftfpga.h | 28 ++- api/src/fft1d.c | 310 ++++----------------------- api/src/fft2d.c | 54 ++--- api/src/fft3d.c | 86 +++----- examples/fft.cpp | 77 ++++++- examples/fft1d_batch.c | 130 ----------- examples/helper.cpp | 137 ++++++++++++ examples/helper.hpp | 5 + kernels/CMakeLists.txt | 7 + kernels/cmake/genKernelTargets.cmake | 2 +- tests/CMakeLists.txt | 6 +- 12 files changed, 325 insertions(+), 520 deletions(-) delete mode 100644 examples/fft1d_batch.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 31f57fb..b451f4d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,9 +20,6 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") find_package(FFTW REQUIRED) message("-- FFTW found") -# Link argparse as static library -add_subdirectory(${CMAKE_SOURCE_DIR}/extern/argparse) - # Add sub directories add_subdirectory(api) add_subdirectory(kernels) diff --git a/api/include/fftfpga/fftfpga.h b/api/include/fftfpga/fftfpga.h index 5752d53..54d2189 100755 --- a/api/include/fftfpga/fftfpga.h +++ b/api/include/fftfpga/fftfpga.h @@ -32,15 +32,15 @@ typedef struct { typedef struct fpga_timing { double pcie_read_t; /**< Time to read from DDR to host using PCIe bus */ double pcie_write_t; /**< Time to write from DDR to host using PCIe bus */ - double exec_t; /**< Kernel execution time from CPU wall clock time */ - double hw_pcie_read_t; /**< HW Counter Time to read from DDR to host using PCIe bus */ - double hw_pcie_write_t; /**< HW Counter Time to write from DDR to host using PCIe bus */ - double hw_exec_t; /**< Kernel execution time from HW counters */ + double exec_t; /**< Kernel execution time */ double svm_copyin_t; /**< Time to copy in data to SVM */ double svm_copyout_t; /**< Time to copy data out of SVM */ bool valid; /**< Represents true signifying valid execution */ } fpga_t; +#ifdef __cplusplus +extern "C" { +#endif /** * @brief Initialize FPGA * @param platform_name: name of the OpenCL platform @@ -83,7 +83,7 @@ extern void* fftfpgaf_complex_malloc(size_t sz); * @param iter : number of iterations of the N point FFT * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpga_c2c_1d(int N, const double2 *inp, double2 *out, bool inv, int iter); +extern fpga_t fftfpga_c2c_1d(unsigned N, const double2 *inp, double2 *out, bool inv, unsigned iter); /** * @brief compute an out-of-place single precision complex 1D-FFT on the FPGA @@ -94,11 +94,18 @@ extern fpga_t fftfpga_c2c_1d(int N, const double2 *inp, double2 *out, bool inv, * @param iter : number of iterations of the N point FFT * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpgaf_c2c_1d(int N, const float2 *inp, float2 *out, bool inv, int iter); - -extern fpga_t fftfpgaf_c2c_1d_svm(int N, const float2 *inp, float2 *out, bool inv, int batch); +extern fpga_t fftfpgaf_c2c_1d(unsigned N, const float2 *inp, float2 *out, bool inv, unsigned iter); -extern fpga_t fftfpgaf_c2c_1d_batch(int N, const float2 *inp, float2 *out, bool inv, unsigned how_many); +/** + * @brief compute an out-of-place single precision complex 1D-FFT on the FPGA + * @param N : integer pointer to size of FFT3d + * @param inp : float2 pointer to input data of size N + * @param out : float2 pointer to output data of size N + * @param inv : int toggle to activate backward FFT + * @param iter : number of iterations of the N point FFT + * @return fpga_t : time taken in milliseconds for data transfers and execution + */ +extern fpga_t fftfpgaf_c2c_1d_svm(unsigned N, const float2 *inp, float2 *out, bool inv, unsigned batch); /** * @brief compute an out-of-place single precision complex 2D-FFT using the BRAM of the FPGA @@ -178,5 +185,8 @@ extern fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, boo */ extern fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, const float2 *inp, float2 *out, bool inv, int how_many); +#ifdef __cplusplus +} +#endif #endif diff --git a/api/src/fft1d.c b/api/src/fft1d.c index 900bddb..4991261 100644 --- a/api/src/fft1d.c +++ b/api/src/fft1d.c @@ -17,15 +17,15 @@ /** * \brief compute an out-of-place double precision complex 1D-FFT on the FPGA - * \param N : integer pointer to size of FFT3d + * \param N : unsigned integer to the number of points in 1D FFT * \param inp : double2 pointer to input data of size N * \param out : double2 pointer to output data of size N * \param inv : int toggle to activate backward FFT * \param batch : number of batched executions of 1D FFT * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpga_c2c_1d(int N, const double2 *inp, double2 *out, bool inv, int batch){ - fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; +fpga_t fftfpga_c2c_1d(unsigned N, const double2 *inp, double2 *out, bool inv, unsigned batch){ + fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_kernel fetch_kernel = NULL, fft_kernel = NULL; cl_int status = 0; @@ -49,22 +49,17 @@ fpga_t fftfpga_c2c_1d(int N, const double2 *inp, double2 *out, bool inv, int bat // Copy data from host to device cl_event writeBuf_event; - fft_time.pcie_write_t = getTimeinMilliSec(); - status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(double2) * N * batch, inp, 0, NULL, &writeBuf_event); + checkError(status, "Failed to copy data to device"); status = clFinish(queue1); checkError(status, "failed to finish writing buffer using PCIe"); - fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; - checkError(status, "Failed to copy data to device"); - cl_ulong writeBuf_start = 0.0, writeBuf_end = 0.0; - clGetEventProfilingInfo(writeBuf_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &writeBuf_start, NULL); clGetEventProfilingInfo(writeBuf_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &writeBuf_end, NULL); - fft_time.hw_pcie_write_t = (cl_double)(writeBuf_end - writeBuf_start) * (cl_double)(1e-06); + fft_time.pcie_write_t = (cl_double)(writeBuf_end - writeBuf_start) * (cl_double)(1e-06); // Can't pass bool to device, so convert it to int int inverse_int = (int)inv; @@ -94,8 +89,6 @@ fpga_t fftfpga_c2c_1d(int N, const double2 *inp, double2 *out, bool inv, int bat // Measure execution time cl_event exec_event; - fft_time.exec_t = getTimeinMilliSec(); - // FFT1d kernel is the SWI kernel status = clEnqueueTask(queue1, fft_kernel, 0, NULL, &exec_event); checkError(status, "Failed to launch fft1d kernel"); @@ -110,31 +103,25 @@ fpga_t fftfpga_c2c_1d(int N, const double2 *inp, double2 *out, bool inv, int bat checkError(status, "Failed to finish queue2"); // Record execution time - fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; - cl_ulong kernel_start = 0, kernel_end = 0; - clGetEventProfilingInfo(exec_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); clGetEventProfilingInfo(exec_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end, NULL); - fft_time.hw_exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); + fft_time.exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); // Copy results from device to host cl_event readBuf_event; - fft_time.pcie_read_t = getTimeinMilliSec(); status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * batch, out, 0, NULL, &readBuf_event); + checkError(status, "Failed to copy data from device"); status = clFinish(queue1); checkError(status, "failed to finish reading buffer using PCIe"); - fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; - checkError(status, "Failed to copy data from device"); - cl_ulong readBuf_start = 0, readBuf_end = 0; clGetEventProfilingInfo(readBuf_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &readBuf_start, NULL); clGetEventProfilingInfo(readBuf_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &readBuf_end, NULL); - fft_time.hw_pcie_read_t = (cl_double)(readBuf_end - readBuf_start) * (cl_double)(1e-06); + fft_time.pcie_read_t = (cl_double)(readBuf_end - readBuf_start) * (cl_double)(1e-06); // Cleanup if (d_inData) @@ -152,16 +139,16 @@ fpga_t fftfpga_c2c_1d(int N, const double2 *inp, double2 *out, bool inv, int bat /** * \brief compute an out-of-place single precision complex 1D-FFT on the FPGA - * \param N : integer pointer to size of FFT3d + * \param N : unsigned integer to the number of points in FFT1d * \param inp : float2 pointer to input data of size N * \param out : float2 pointer to output data of size N * \param inv : true for backward transforms * \param batch : number of batched executions of 1D FFT * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_1d(int N, const float2 *inp, float2 *out, bool inv, int batch){ +fpga_t fftfpgaf_c2c_1d(unsigned N, const float2 *inp, float2 *out, bool inv, unsigned batch){ - fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; + fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_kernel kernel1 = NULL, kernel2 = NULL; cl_int status = 0; @@ -187,16 +174,12 @@ fpga_t fftfpgaf_c2c_1d(int N, const float2 *inp, float2 *out, bool inv, int batc checkError(status, "Failed to allocate output device buffer\n"); // Copy data from host to device - fft_time.pcie_write_t = getTimeinMilliSec(); - status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * N * batch, inp, 0, NULL, NULL); + checkError(status, "Failed to copy data to device"); status = clFinish(queue1); checkError(status, "failed to finish writing buffer using PCIe"); - fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; - checkError(status, "Failed to copy data to device"); - // Can't pass bool to device, so convert it to int int inverse_int = (int)inv; @@ -222,15 +205,14 @@ fpga_t fftfpgaf_c2c_1d(int N, const float2 *inp, float2 *out, bool inv, int batc size_t ls = N/8; size_t gs = batch * ls; + cl_event startExec_event, endExec_event; // Measure execution time - fft_time.exec_t = getTimeinMilliSec(); - // Launch the kernel - we launch a single work item hence enqueue a task // FFT1d kernel is the SWI kernel - status = clEnqueueTask(queue1, kernel2, 0, NULL, NULL); + status = clEnqueueTask(queue1, kernel2, 0, NULL, &endExec_event); checkError(status, "Failed to launch fft1d kernel"); - status = clEnqueueNDRangeKernel(queue2, kernel1, 1, NULL, &gs, &ls, 0, NULL, NULL); + status = clEnqueueNDRangeKernel(queue2, kernel1, 1, NULL, &gs, &ls, 0, NULL, &startExec_event); checkError(status, "Failed to launch fetch kernel"); // Wait for command queue to complete pending events @@ -240,18 +222,20 @@ fpga_t fftfpgaf_c2c_1d(int N, const float2 *inp, float2 *out, bool inv, int batc checkError(status, "Failed to finish queue2"); // Record execution time - fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + cl_ulong kernel_start = 0, kernel_end = 0; + + clGetEventProfilingInfo(startExec_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); + clGetEventProfilingInfo(endExec_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end, NULL); + + fft_time.exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); // Copy results from device to host - fft_time.pcie_read_t = getTimeinMilliSec(); status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * batch, out, 0, NULL, NULL); + checkError(status, "Failed to copy data from device"); status = clFinish(queue1); checkError(status, "failed to finish reading buffer using PCIe"); - fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; - checkError(status, "Failed to copy data from device"); - // Cleanup if (d_inData) clReleaseMemObject(d_inData); @@ -269,17 +253,17 @@ fpga_t fftfpgaf_c2c_1d(int N, const float2 *inp, float2 *out, bool inv, int batc /** * \brief compute an out-of-place single precision complex 1D-FFT on the FPGA using Shared Virtual Memory for data transfers between host's main memory and FPGA - * \param N : integer pointer to size of FFT3d + * \param N : unsigned integer to the number of points in 1D FFT * \param inp : float2 pointer to input data of size N * \param out : float2 pointer to output data of size N * \param inv : int toggle to activate backward FFT * \param batch : number of batched executions of 1D FFT * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_1d_svm(int N, const float2 *inp, float2 *out, bool inv, int batch){ - fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; +fpga_t fftfpgaf_c2c_1d_svm(unsigned N, const float2 *inp, float2 *out, bool inv, unsigned batch){ + fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_int status = 0; - int num_pts = N * batch; + unsigned num_pts = N * batch; // if N is not a power of 2 if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || !(svm_enabled)){ @@ -348,11 +332,12 @@ fpga_t fftfpgaf_c2c_1d_svm(int N, const float2 *inp, float2 *out, bool inv, int size_t ls = N/8; size_t gs = batch * ls; - fft_time.exec_t = getTimeinMilliSec(); - status = clEnqueueTask(queue1, fft_kernel, 0, NULL, NULL); + cl_event startExec_event, endExec_event; + + status = clEnqueueTask(queue1, fft_kernel, 0, NULL, &endExec_event); checkError(status, "Failed to launch fetch kernel"); - status = clEnqueueNDRangeKernel(queue2, fetch_kernel, 1, NULL, &gs, &ls, 0, NULL, NULL); + status = clEnqueueNDRangeKernel(queue2, fetch_kernel, 1, NULL, &gs, &ls, 0, NULL, &startExec_event); checkError(status, "Failed to launch fetch kernel"); status = clFinish(queue2); @@ -360,7 +345,12 @@ fpga_t fftfpgaf_c2c_1d_svm(int N, const float2 *inp, float2 *out, bool inv, int status = clFinish(queue1); checkError(status, "failed to finish"); - fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + cl_ulong kernel_start = 0, kernel_end = 0; + + clGetEventProfilingInfo(startExec_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); + clGetEventProfilingInfo(endExec_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end, NULL); + + fft_time.exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); @@ -390,231 +380,3 @@ fpga_t fftfpgaf_c2c_1d_svm(int N, const float2 *inp, float2 *out, bool inv, int fft_time.valid = 1; return fft_time; } - -/** - * \brief compute an out-of-place batched single precision complex 1D-FFT on the FPGA - * \param N : integer pointer to size of FFT3d - * \param inp : float2 pointer to input data of size N - * \param out : float2 pointer to output data of size N - * \param inv : true for backward transforms - * \param how_many : number of batched executions of 1D FFT - * \return fpga_t : time taken in milliseconds for data transfers and execution - */ -fpga_t fftfpgaf_c2c_1d_batch(int N, const float2 *inp, float2 *out, bool inv, unsigned how_many){ - - fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; - cl_kernel fetch_kernel = NULL, fft_kernel = NULL; - cl_int status = 0; - - // if N is not a power of 2 - if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || (how_many <= 1)){ - return fft_time; - } - - queue_setup(); - - // Device Buffers - cl_mem d_inData[3], d_outData[3]; - d_inData[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * N, NULL, &status); - checkError(status, "Failed to allocate input device buffer\n"); - - d_inData[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * N, NULL, &status); - checkError(status, "Failed to allocate input device buffer\n"); - - d_inData[2] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_3_INTELFPGA, sizeof(float2) * N, NULL, &status); - checkError(status, "Failed to allocate input device buffer\n"); - - d_outData[0] = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * N, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); - - d_outData[1] = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * N, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); - - d_outData[2] = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_3_INTELFPGA, sizeof(float2) * N, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); - - cl_event writeEvent[2]; - - fft_time.pcie_write_t = getTimeinMilliSec(); - - clEnqueueWriteBuffer(queue1, d_inData[0], CL_TRUE, 0, sizeof(float2) * N, inp, 0, NULL, NULL); - - for(size_t i = 1; i < how_many; i++){ - clEnqueueWriteBuffer(queue2, d_inData[i%2], CL_FALSE, 0, sizeof(float2) * N, &inp[i * N], 0, NULL, &writeEvent[0]); - - status = clEnqueueReadBuffer(queue3, d_outData[(i-1)%2], CL_FALSE, 0, sizeof(float2) * N, &out[(i-1) * N], 0, NULL, &writeEvent[1]); - checkError(status, "Failed to read"); - - clFinish(queue2); - clFinish(queue3); - - clWaitForEvents(2, writeEvent); - clReleaseEvent(writeEvent[0]); - clReleaseEvent(writeEvent[1]); - } - - status = clEnqueueReadBuffer(queue3, d_outData[(how_many-1) % 2], CL_FALSE, 0, sizeof(float2) * N, &out[(how_many - 1) * N], 0, NULL, &writeEvent[0]); - checkError(status, "Failed to read"); - - clFinish(queue3); - clWaitForEvents(1, &writeEvent[0]); - clReleaseEvent(writeEvent[0]); - - fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; - - // Cleanup - if (d_inData[0]) - clReleaseMemObject(d_inData[0]); - if (d_inData[1]) - clReleaseMemObject(d_inData[1]); - if (d_inData[2]) - clReleaseMemObject(d_inData[2]); - - if (d_outData[0]) - clReleaseMemObject(d_outData[0]); - if (d_outData[1]) - clReleaseMemObject(d_outData[1]); - if (d_outData[2]) - clReleaseMemObject(d_outData[2]); - - if(fetch_kernel) - clReleaseKernel(fetch_kernel); - if(fft_kernel) - clReleaseKernel(fft_kernel); - - queue_cleanup(); - - fft_time.valid = 1; - return fft_time; - -} - -/* -fpga_t fftfpgaf_c2c_1d_batch(int N, float2 *inp, float2 *out, bool inv, unsigned how_many){ - - fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; - cl_kernel fetch_kernel = NULL, fft_kernel = NULL; - cl_mem d_inData[2], d_outData[2]; - cl_int status = 0; - int batch = 1; - - // if N is not a power of 2 - if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || (how_many <= 1)){ - return fft_time; - } - - queue_setup(); - - // Device Buffers - d_inData[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * N, NULL, &status); - checkError(status, "Failed to allocate input device buffer\n"); - - d_inData[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * N, NULL, &status); - checkError(status, "Failed to allocate input device buffer\n"); - - d_outData[0] = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * N, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); - - d_outData[1] = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * N, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); - - // Kernels - fetch_kernel = clCreateKernel(program, "fetch", &status); - checkError(status, "Failed to create fetch kernel"); - - fft_kernel = clCreateKernel(program, "fft1d", &status); - checkError(status, "Failed to create fft1d kernel"); - - // Set the kernel arguments - cl_event readEvent[how_many], writeEvent[how_many]; - cl_event kernelEvent1[how_many], kernelEvent2[how_many]; - cl_event wr_dep[2], rd_dep[2], kernel_dep[2]; - - int inverse_int = (int)inv; - size_t ls = N / 8; - size_t gs = ls; - //size_t gs = batch * ls; - - fft_time.pcie_write_t = getTimeinMilliSec(); - - for(size_t i = 0; i < how_many; i++){ - if(i < 2){ - clEnqueueWriteBuffer(queue1, d_inData[i%2], CL_FALSE, 0, sizeof(float2) * N, &inp[i * N], 0, NULL, &writeEvent[i]); - clFlush(queue1); - - status = clSetKernelArg(fetch_kernel, 0, sizeof(cl_mem *), &d_inData[i%2]); - checkError(status, "Failed to set fetch kernel arg 0"); - status = clSetKernelArg(fft_kernel, 0, sizeof(cl_mem *), &d_outData[i%2]); - checkError(status, "Failed to set fft kernel arg 0"); - status = clSetKernelArg(fft_kernel, 1, sizeof(cl_int), &batch); - checkError(status, "Failed to set fft kernel arg 1"); - status = clSetKernelArg(fft_kernel, 2, sizeof(cl_int), &inverse_int); - checkError(status, "Failed to set fft kernel arg 2"); - - status = clEnqueueTask(queue2, fft_kernel, 1, &writeEvent[i], &kernelEvent1[i]); - checkError(status, "Failed to launch fft1d kernel"); - - status = clEnqueueNDRangeKernel(queue3, fetch_kernel, 1, NULL, &gs, &ls, 1, &writeEvent[i], &kernelEvent2[i]); - checkError(status, "Failed to launch fetch kernel"); - - clFlush(queue2); - clFlush(queue3); - } - else{ - wr_dep[0] = kernelEvent1[i - 2]; - wr_dep[1] = kernelEvent2[i - 2]; - - clEnqueueWriteBuffer(queue1, d_inData[i%2], CL_FALSE, 0, sizeof(float2) * N, &inp[i * N], 2, wr_dep, &writeEvent[i]); - clFlush(queue1); - - kernel_dep[0] = writeEvent[i]; - kernel_dep[1] = readEvent[i-2]; - - status = clSetKernelArg(fetch_kernel, 0, sizeof(cl_mem *), &d_inData[i%2]); - checkError(status, "Failed to set fetch kernel arg 0"); - status = clSetKernelArg(fft_kernel, 0, sizeof(cl_mem *), &d_outData[i%2]); - checkError(status, "Failed to set fft kernel arg 0"); - status = clSetKernelArg(fft_kernel, 1, sizeof(cl_int), &batch); - checkError(status, "Failed to set fft kernel arg 1"); - status = clSetKernelArg(fft_kernel, 2, sizeof(cl_int), &inverse_int); - checkError(status, "Failed to set fft kernel arg 2"); - - status = clEnqueueTask(queue2, fft_kernel, 2, kernel_dep, &kernelEvent1[i]); - checkError(status, "Failed to launch fft1d kernel"); - - status = clEnqueueNDRangeKernel(queue3, fetch_kernel, 1, NULL, &gs, &ls, 2, kernel_dep, &kernelEvent2[i]); - checkError(status, "Failed to launch fetch kernel"); - - clFlush(queue2); - clFlush(queue3); - } - rd_dep[0] = kernelEvent1[i]; - rd_dep[1] = kernelEvent2[i]; - - status = clEnqueueReadBuffer(queue4, d_outData[i%2], CL_FALSE, 0, sizeof(float2) * N, &out[i*N], 2, rd_dep, &readEvent[i]); - checkError(status, "Failed to read"); - clFlush(queue4); - } - - fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; - - // Cleanup - if (d_inData[0]) - clReleaseMemObject(d_inData[0]); - if (d_inData[1]) - clReleaseMemObject(d_inData[1]); - if (d_outData[0]) - clReleaseMemObject(d_outData[0]); - if (d_outData[1]) - clReleaseMemObject(d_outData[1]); - if(fetch_kernel) - clReleaseKernel(fetch_kernel); - if(fft_kernel) - clReleaseKernel(fft_kernel); - - queue_cleanup(); - - fft_time.valid = 1; - return fft_time; -} -*/ \ No newline at end of file diff --git a/api/src/fft2d.c b/api/src/fft2d.c index e68f908..4cec976 100644 --- a/api/src/fft2d.c +++ b/api/src/fft2d.c @@ -25,7 +25,7 @@ * \return fpga_t : time taken in milliseconds for data transfers and execution */ fpga_t fftfpgaf_c2c_2d_ddr(int N, const float2 *inp, float2 *out, bool inv){ - fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; + fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0}; cl_kernel fetch_kernel = NULL, fft_kernel = NULL, transpose_kernel = NULL; cl_int status = 0; int mangle_int = 0; @@ -52,22 +52,18 @@ fpga_t fftfpgaf_c2c_2d_ddr(int N, const float2 *inp, float2 *out, bool inv){ // Copy data from host to device cl_event writeBuf_event; - fft_time.pcie_write_t = getTimeinMilliSec(); - status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * N * N, inp, 0, NULL, &writeBuf_event); + checkError(status, "Failed to copy data to device"); status = clFinish(queue1); checkError(status, "failed to finish writing buffer using PCIe"); - fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; - checkError(status, "Failed to copy data to device"); - cl_ulong writeBuf_start = 0.0, writeBuf_end = 0.0; clGetEventProfilingInfo(writeBuf_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &writeBuf_start, NULL); clGetEventProfilingInfo(writeBuf_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &writeBuf_end, NULL); - fft_time.hw_pcie_write_t = (cl_double)(writeBuf_end - writeBuf_start) * (cl_double)(1e-06); + fft_time.pcie_write_t = (cl_double)(writeBuf_end - writeBuf_start) * (cl_double)(1e-06); // Can't pass bool to device, so convert it to int int inverse_int = (int)inv; @@ -81,9 +77,6 @@ fpga_t fftfpgaf_c2c_2d_ddr(int N, const float2 *inp, float2 *out, bool inv){ checkError(status, "Failed to create kernel"); cl_event startExec_event[2], endExec_event[2]; - // Record execution time - fft_time.exec_t = getTimeinMilliSec(); - // Loop twice over the kernels for (size_t i = 0; i < 2; i++) { @@ -124,36 +117,31 @@ fpga_t fftfpgaf_c2c_2d_ddr(int N, const float2 *inp, float2 *out, bool inv){ checkError(status, "failed to finish"); } - fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; - cl_ulong kernel_start = 0, kernel_end = 0; clGetEventProfilingInfo(startExec_event[0], CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); clGetEventProfilingInfo(endExec_event[0], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end, NULL); - fft_time.hw_exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); + fft_time.exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); clGetEventProfilingInfo(startExec_event[1], CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); clGetEventProfilingInfo(endExec_event[1], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end, NULL); - fft_time.hw_exec_t += (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); + fft_time.exec_t += (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); // Copy results from device to host cl_event readBuf_event; - fft_time.pcie_read_t = getTimeinMilliSec(); status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * N, out, 0, NULL, &readBuf_event); + checkError(status, "Failed to copy data from device"); status = clFinish(queue1); checkError(status, "failed to finish reading buffer using PCIe"); - fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; - checkError(status, "Failed to copy data from device"); - cl_ulong readBuf_start = 0, readBuf_end = 0; clGetEventProfilingInfo(readBuf_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &readBuf_start, NULL); clGetEventProfilingInfo(readBuf_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &readBuf_end, NULL); - fft_time.hw_pcie_read_t = (cl_double)(readBuf_end - readBuf_start) * (cl_double)(1e-06); + fft_time.pcie_read_t = (cl_double)(readBuf_end - readBuf_start) * (cl_double)(1e-06); // Cleanup if (d_inData) @@ -184,7 +172,7 @@ fpga_t fftfpgaf_c2c_2d_ddr(int N, const float2 *inp, float2 *out, bool inv){ * \return fpga_t : time taken in milliseconds for data transfers and execution */ fpga_t fftfpgaf_c2c_2d_bram(int N, const float2 *inp, float2 *out, bool inv, bool interleaving, int how_many){ - fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; + fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_kernel ffta_kernel = NULL, fftb_kernel = NULL; cl_kernel fetch_kernel = NULL, store_kernel = NULL; cl_kernel transpose_kernel = NULL; @@ -223,22 +211,19 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, const float2 *inp, float2 *out, bool inv, boo // Copy data from host to device cl_event writeBuf_event; - fft_time.pcie_write_t = getTimeinMilliSec(); - status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * num_pts, inp, 0, NULL, &writeBuf_event); + checkError(status, "Failed to copy data to device"); status = clFinish(queue1); checkError(status, "failed to finish"); - fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; - checkError(status, "Failed to copy data to device"); cl_ulong writeBuf_start = 0.0, writeBuf_end = 0.0; clGetEventProfilingInfo(writeBuf_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &writeBuf_start, NULL); clGetEventProfilingInfo(writeBuf_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &writeBuf_end, NULL); - fft_time.hw_pcie_write_t = (cl_double)(writeBuf_end - writeBuf_start) * (cl_double)(1e-06); + fft_time.pcie_write_t = (cl_double)(writeBuf_end - writeBuf_start) * (cl_double)(1e-06); // Can't pass bool to device, so convert it to int int inverse_int = (int)inv; @@ -287,8 +272,6 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, const float2 *inp, float2 *out, bool inv, boo // Kernel Execution cl_event startExec_event, endExec_event; - - fft_time.exec_t = getTimeinMilliSec(); status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, &startExec_event); checkError(status, "Failed to launch fetch kernel"); @@ -315,32 +298,28 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, const float2 *inp, float2 *out, bool inv, boo checkError(status, "failed to finish queue4"); status = clFinish(queue5); checkError(status, "failed to finish queue5"); - fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; cl_ulong kernel_start = 0, kernel_end = 0; clGetEventProfilingInfo(startExec_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); clGetEventProfilingInfo(endExec_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end, NULL); - fft_time.hw_exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); + fft_time.exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); // Copy results from device to host cl_event readBuf_event; - fft_time.pcie_read_t = getTimeinMilliSec(); status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * num_pts, out, 0, NULL, &readBuf_event); + checkError(status, "Failed to copy data from device"); status = clFinish(queue1); checkError(status, "failed to finish reading buffer using PCIe"); - fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; - checkError(status, "Failed to copy data from device"); - cl_ulong readBuf_start = 0, readBuf_end = 0; clGetEventProfilingInfo(readBuf_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &readBuf_start, NULL); clGetEventProfilingInfo(readBuf_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &readBuf_end, NULL); - fft_time.hw_pcie_read_t = (cl_double)(readBuf_end - readBuf_start) * (cl_double)(1e-06); + fft_time.pcie_read_t = (cl_double)(readBuf_end - readBuf_start) * (cl_double)(1e-06); queue_cleanup(); @@ -376,7 +355,7 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, const float2 *inp, float2 *out, bool inv, boo * \return fpga_t : time taken in milliseconds for data transfers and execution */ fpga_t fftfpgaf_c2c_2d_bram_svm(int N, const float2 *inp, float2 *out, bool inv, int how_many){ - fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; + fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_int status = 0; int num_pts = how_many * N * N; @@ -472,8 +451,6 @@ fpga_t fftfpgaf_c2c_2d_bram_svm(int N, const float2 *inp, float2 *out, bool inv, checkError(status, "Failed to set store kernel arg"); cl_event startExec_event, endExec_event; - - fft_time.exec_t = getTimeinMilliSec(); status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, &startExec_event); checkError(status, "Failed to launch fetch kernel"); @@ -500,13 +477,12 @@ fpga_t fftfpgaf_c2c_2d_bram_svm(int N, const float2 *inp, float2 *out, bool inv, checkError(status, "failed to finish queue3"); status = clFinish(queue4); checkError(status, "failed to finish queue4"); - fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; cl_ulong kernel_start = 0, kernel_end = 0; clGetEventProfilingInfo(startExec_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); clGetEventProfilingInfo(endExec_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end, NULL); - fft_time.hw_exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); + fft_time.exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); diff --git a/api/src/fft3d.c b/api/src/fft3d.c index 38aa71d..cfdcb34 100644 --- a/api/src/fft3d.c +++ b/api/src/fft3d.c @@ -14,6 +14,7 @@ #include "svm.h" #include "opencl_utils.h" #include "misc.h" +#include "/opt/intelFPGA_pro/19.2.0/hld/board/custom_platform_toolkit/mmd/aocl_mmd.h" #define WR_GLOBALMEM 0 #define RD_GLOBALMEM 1 @@ -32,7 +33,7 @@ * \return fpga_t : time taken in milliseconds for data transfers and execution */ fpga_t fftfpgaf_c2c_3d_bram(int N, const float2 *inp, float2 *out, bool inv, bool interleaving) { - fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; + fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_int status = 0; cl_kernel fft3da_kernel = NULL, fft3db_kernel = NULL, fft3dc_kernel = NULL; @@ -45,7 +46,7 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, const float2 *inp, float2 *out, bool inv, boo } #ifdef VERBOSE - printf("Launching%s 3d FFT transform \n", inv ? " inverse":""); + printf("Launching%s 3d FFT transform using BRAM for 3D Transpose\n", inv ? " inverse":""); #endif queue_setup(); @@ -68,23 +69,17 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, const float2 *inp, float2 *out, bool inv, boo checkError(status, "Failed to allocate output device buffer\n"); cl_event writeBuf_event; - // Copy data from host to device - fft_time.pcie_write_t = getTimeinMilliSec(); - status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * N * N * N, inp, 0, NULL, &writeBuf_event); + checkError(status, "Failed to copy data to device"); status = clFinish(queue1); checkError(status, "failed to finish"); - fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; - checkError(status, "Failed to copy data to device"); - cl_ulong writeBuf_start = 0.0, writeBuf_end = 0.0; - clGetEventProfilingInfo(writeBuf_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &writeBuf_start, NULL); clGetEventProfilingInfo(writeBuf_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &writeBuf_end, NULL); - fft_time.hw_pcie_write_t = (cl_double)(writeBuf_end - writeBuf_start) * (cl_double)(1e-06); + fft_time.pcie_write_t = (cl_double)(writeBuf_end - writeBuf_start) * (cl_double)(1e-06); // Can't pass bool to device, so convert it to int int inverse_int = (int)inv; @@ -126,7 +121,6 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, const float2 *inp, float2 *out, bool inv, boo // Kernel Execution cl_event startExec_event, endExec_event; - fft_time.exec_t = getTimeinMilliSec(); status = clEnqueueTask(queue7, store_kernel, 0, NULL, &endExec_event); checkError(status, "Failed to launch store transpose kernel"); @@ -164,31 +158,25 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, const float2 *inp, float2 *out, bool inv, boo status = clFinish(queue7); checkError(status, "failed to finish"); - fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; - cl_ulong kernel_start = 0, kernel_end = 0; clGetEventProfilingInfo(startExec_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); clGetEventProfilingInfo(endExec_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end, NULL); - fft_time.hw_exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); + fft_time.exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); // Copy results from device to host cl_event readBuf_event; - fft_time.pcie_read_t = getTimeinMilliSec(); status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * N * N, out, 0, NULL, &readBuf_event); - + checkError(status, "Failed to copy data from device"); status = clFinish(queue1); checkError(status, "failed to finish reading buffer using PCIe"); - fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; - checkError(status, "Failed to copy data from device"); - cl_ulong readBuf_start = 0, readBuf_end = 0; clGetEventProfilingInfo(readBuf_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &readBuf_start, NULL); clGetEventProfilingInfo(readBuf_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &readBuf_end, NULL); - fft_time.hw_pcie_read_t = (cl_double)(readBuf_end - readBuf_start) * (cl_double)(1e-06); + fft_time.pcie_read_t = (cl_double)(readBuf_end - readBuf_start) * (cl_double)(1e-06); queue_cleanup(); @@ -226,7 +214,7 @@ fpga_t fftfpgaf_c2c_3d_bram(int N, const float2 *inp, float2 *out, bool inv, boo * \return fpga_t : time taken in milliseconds for data transfers and execution */ fpga_t fftfpgaf_c2c_3d_ddr(int N, const float2 *inp, float2 *out, bool inv) { - fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; + fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_int status = 0; int num_pts = N * N * N; @@ -277,22 +265,18 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, const float2 *inp, float2 *out, bool inv) { // Copy data from host to device cl_event writeBuf_event; - fft_time.pcie_write_t = getTimeinMilliSec(); - status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * num_pts, inp, 0, NULL, &writeBuf_event); + checkError(status, "Failed to copy data to device"); status = clFinish(queue1); - checkError(status, "failed to finish"); - - fft_time.pcie_write_t = getTimeinMilliSec() - fft_time.pcie_write_t; - checkError(status, "Failed to copy data to device"); + checkError(status, "Failed to finish data transfer to device"); cl_ulong writeBuf_start = 0.0, writeBuf_end = 0.0; clGetEventProfilingInfo(writeBuf_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &writeBuf_start, NULL); clGetEventProfilingInfo(writeBuf_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &writeBuf_end, NULL); - fft_time.hw_pcie_write_t = (cl_double)(writeBuf_end - writeBuf_start) * (cl_double)(1e-06); + fft_time.pcie_write_t = (cl_double)(writeBuf_end - writeBuf_start) * (cl_double)(1e-06); status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData); checkError(status, "Failed to set fetch1 kernel arg"); @@ -314,8 +298,6 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, const float2 *inp, float2 *out, bool inv) { // Kernel Execution cl_event startExec_event, endExec_event; - - fft_time.exec_t = getTimeinMilliSec(); status = clEnqueueTask(queue7, store2_kernel, 0, NULL, &endExec_event); checkError(status, "Failed to launch transpose kernel"); @@ -342,46 +324,39 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, const float2 *inp, float2 *out, bool inv) { status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, &startExec_event); checkError(status, "Failed to launch fetch kernel"); - status = clFinish(queue7); + status = clFinish(queue1); checkError(status, "failed to finish"); - status = clFinish(queue6); + status = clFinish(queue2); checkError(status, "failed to finish"); - status = clFinish(queue5); + status = clFinish(queue3); checkError(status, "failed to finish"); status = clFinish(queue4); checkError(status, "failed to finish"); - status = clFinish(queue3); + status = clFinish(queue5); checkError(status, "failed to finish"); - status = clFinish(queue2); + status = clFinish(queue6); checkError(status, "failed to finish"); - status = clFinish(queue1); + status = clFinish(queue7); checkError(status, "failed to finish"); - fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; - cl_ulong kernel_start = 0, kernel_end = 0; - clGetEventProfilingInfo(startExec_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); clGetEventProfilingInfo(endExec_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end, NULL); - fft_time.hw_exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); + fft_time.exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); // Copy results from device to host cl_event readBuf_event; - fft_time.pcie_read_t = getTimeinMilliSec(); status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * num_pts, out, 0, NULL, &readBuf_event); - + checkError(status, "Failed to copy data from device to host"); status = clFinish(queue1); checkError(status, "failed to finish reading DDR using PCIe"); - fft_time.pcie_read_t = getTimeinMilliSec() - fft_time.pcie_read_t; - checkError(status, "Failed to copy data from device"); - cl_ulong readBuf_start = 0, readBuf_end = 0; clGetEventProfilingInfo(readBuf_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &readBuf_start, NULL); clGetEventProfilingInfo(readBuf_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &readBuf_end, NULL); - fft_time.hw_pcie_read_t = (cl_double)(readBuf_end - readBuf_start) * (cl_double)(1e-06); + fft_time.pcie_read_t = (cl_double)(readBuf_end - readBuf_start) * (cl_double)(1e-06); queue_cleanup(); @@ -426,7 +401,7 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, const float2 *inp, float2 *out, bool inv) { * \return fpga_t : time taken in milliseconds for data transfers and execution */ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, bool interleaving) { - fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, false}; + fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, false}; cl_int status = 0; unsigned num_pts = N * N * N; @@ -478,8 +453,7 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, h_outData = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); size_t num_bytes = num_pts * sizeof(float2); - double svm_copyin_t = 0.0; - svm_copyin_t = getTimeinMilliSec(); + double svm_copyin_t = getTimeinMilliSec(); status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData, sizeof(float2) * num_pts, 0, NULL, NULL); checkError(status, "Failed to map input data"); @@ -533,8 +507,6 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, checkError(status, "Failed to set store kernel arg"); cl_event startExec_event, endExec_event; - - fft_time.exec_t = getTimeinMilliSec(); status = clEnqueueTask(queue7, store_kernel, 0, NULL, &endExec_event); checkError(status, "Failed to launch transpose kernel"); @@ -579,14 +551,12 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, status = clFinish(queue1); checkError(status, "failed to finish"); - fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; - cl_ulong kernel_start = 0, kernel_end = 0; clGetEventProfilingInfo(startExec_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); clGetEventProfilingInfo(endExec_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end, NULL); - fft_time.hw_exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); + fft_time.exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); double svm_copyout_t = 0.0; svm_copyout_t = getTimeinMilliSec(); @@ -645,7 +615,7 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, * \return fpga_t : time taken in milliseconds for data transfers and execution */ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, const float2 *inp, float2 *out, bool inv, bool interleaving, int how_many) { - fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; + fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_int status = 0; int num_pts = N * N * N; @@ -1120,7 +1090,7 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, const float2 *inp, float2 *out, bool inv * \return fpga_t : time taken in milliseconds for data transfers and execution */ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, const float2 *inp, float2 *out, bool inv, int how_many) { - fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, false}; + fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, false}; cl_int status = 0; // 0 - WR_GLOBALMEM, 1 - RD_GLOBALMEM, 2 - BATCH int mode_transpose = WR_GLOBALMEM; @@ -1166,6 +1136,7 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, const float2 *inp, float2 *out, bool double svm_copyin_t = 0.0; float2 *h_inData[how_many], *h_outData[how_many]; for(size_t i = 0; i < how_many; i++){ + h_inData[i] = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); h_outData[i] = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); @@ -1182,7 +1153,6 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, const float2 *inp, float2 *out, bool checkError(status, "Failed to unmap input data"); fft_time.svm_copyin_t += getTimeinMilliSec() - svm_copyin_t; - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_outData[i], sizeof(float2) * num_pts, 0, NULL, NULL); checkError(status, "Failed to map input data"); @@ -1342,7 +1312,7 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, const float2 *inp, float2 *out, bool clGetEventProfilingInfo(startExec_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); clGetEventProfilingInfo(endExec_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end, NULL); - fft_time.hw_exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); + fft_time.exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); double svm_copyout_t = 0.0; for(size_t i = 0; i < how_many; i++){ diff --git a/examples/fft.cpp b/examples/fft.cpp index f2c05a2..0f83177 100644 --- a/examples/fft.cpp +++ b/examples/fft.cpp @@ -1,4 +1,6 @@ #include +#include +#include "fftfpga/fftfpga.h" #include "helper.hpp" using namespace std; @@ -7,7 +9,80 @@ int main(int argc, char* argv[]){ CONFIG config; parse_args(argc, argv, config); - print_config(config); + + const char* platform; + if(config.emulate) + platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; + else + platform = "Intel(R) FPGA SDK for OpenCL(TM)"; + + bool use_svm = false; + int isInit = fpga_initialize(platform, config.path.data(), use_svm); + if(isInit != 0){ + cerr << "FPGA initialization error\n"; + return EXIT_FAILURE; + } + + const unsigned num = config.num; + const unsigned sz = pow(num, config.dim); + float2 *inp = new float2[sz](); + float2 *out = new float2[sz](); + fpga_t runtime[config.iter]; + + try{ + create_data(inp, sz); + + const unsigned inv = config.inv; + const bool burst = config.burst; + + for(unsigned i = 0; i < config.iter; i++){ + switch(config.dim) { + case 1: runtime[i] = fftfpgaf_c2c_1d(num, inp, out, inv, config.batch); + break; + case 2: { + if(config.use_bram) + runtime[i] = fftfpgaf_c2c_2d_bram(num, inp, out, inv, burst, config.batch); + else + runtime[i] = fftfpgaf_c2c_2d_ddr(num, inp, out, inv); + break; + } + case 3:{ + if(config.use_bram) + runtime[i] = fftfpgaf_c2c_3d_bram(num, inp, out, inv, burst); + else if(!config.use_bram && (config.batch > 1)) + runtime[i] = fftfpgaf_c2c_3d_ddr_batch(num, inp, out, inv, burst, config.batch); + else + runtime[i] = fftfpgaf_c2c_3d_ddr(num, inp, out, inv); + break; + } + default: + break; + } + + if(!config.noverify){ + if(!verify_fftwf(inp, out, config)){ + char excp[80]; + snprintf(excp, 80, "Iter %u: FPGA result incorrect in comparison to FFTW\n", i); + throw runtime_error(excp); + } + } + } + } + catch(const char* msg){ + cerr << msg << endl; + fpga_final(); + delete inp; + delete out; + return EXIT_FAILURE; + } + + perf_measures(config, runtime); + + // destroy fpga state + fpga_final(); + + delete inp; + delete out; return EXIT_SUCCESS; } \ No newline at end of file diff --git a/examples/fft1d_batch.c b/examples/fft1d_batch.c deleted file mode 100644 index c3d8e30..0000000 --- a/examples/fft1d_batch.c +++ /dev/null @@ -1,130 +0,0 @@ -// Author: Arjun Ramaswami - -#include -#include // EXIT_FAILURE -#include -#include - -#include "CL/opencl.h" -#include "fftfpga/fftfpga.h" - -#include "argparse.h" -#include "helper.h" -#include "verify_fftw.h" - -static const char *const usage[] = { - "bin/host [options]", - NULL, -}; - -int main(int argc, const char **argv) { - int N = 64, dim = 1, iter = 1, batch = 1; - - bool use_bram = false, sp = true, inv = false, use_svm = false, interleaving = false; - bool status = true, use_emulator = false; - - char *path = "fft1d_emulate.aocx"; - const char *platform = "Intel(R) FPGA"; - - fpga_t timing = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; - double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; - double avg_hw_rd = 0.0, avg_hw_wr = 0.0, avg_hw_exec = 0.0; - double temp_timer = 0.0, total_api_time = 0.0; - - struct argparse_option options[] = { - OPT_HELP(), - OPT_GROUP("Basic Options"), - OPT_INTEGER('n',"n", &N, "FFT Points"), - OPT_INTEGER('i',"iter", &iter, "Iterations"), - OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), - OPT_INTEGER('c',"batch", &batch, "Batch"), - OPT_STRING('p', "path", &path, "Path to bitstream"), - OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), - OPT_END(), - }; - - struct argparse argparse; - argparse_init(&argparse, options, usage, 0); - argparse_describe(&argparse, "Computing FFT using FPGA", "FFT size and dimensions are mandatory, default dimension and number of iterations are 1"); - argc = argparse_parse(&argparse, argc, argv); - - // Print to console the configuration chosen to execute during runtime - print_config(N, dim, iter, inv, sp, batch, use_bram, interleaving); - - if(use_emulator){ - platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; - } - else{ - platform = "Intel(R) FPGA SDK for OpenCL(TM)"; - } - - int isInit = fpga_initialize(platform, path, use_svm); - if(isInit != 0){ - return EXIT_FAILURE; - } - - size_t inp_sz = sizeof(float2) * N * batch; - - float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); - float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); - - // find the average of iterations of batched 1D FFTs - // random data every iteration and every batch - for(size_t i = 0; i < iter; i++){ - - status = fftf_create_data(inp, N * batch); - if(!status){ - fprintf(stderr, "Error in Data Creation \n"); - free(inp); - free(out); - return EXIT_FAILURE; - } - - temp_timer = getTimeinMilliseconds(); - timing = fftfpgaf_c2c_1d_batch(N, inp, out, inv, batch); - total_api_time += getTimeinMilliseconds() - temp_timer; - -#ifdef USE_FFTW - if(!verify_fftwf(out, inp, N, 1, inv, batch)){ - fprintf(stderr, "1d FFT Verification Failed \n"); - free(inp); - free(out); - return EXIT_FAILURE; - } -#endif - // TODO: Verification of bit reversed output - if(timing.valid == 0){ - fprintf(stderr, "Invalid execution, timing found to be 0"); - free(inp); - free(out); - return EXIT_FAILURE; - } - avg_rd += timing.pcie_read_t; - avg_wr += timing.pcie_write_t; - avg_exec += timing.exec_t; - avg_hw_rd += timing.hw_pcie_read_t; - avg_hw_wr += timing.hw_pcie_write_t; - avg_hw_exec += timing.hw_exec_t; - - printf("Iter: %lu\n", i); - printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); - printf("\tKernel: %lfms\n", timing.exec_t); - printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); - - printf("Hw Counters: \n"); - printf("\tHW PCIe Rd: %lfms\n", timing.hw_pcie_read_t); - printf("\tHW Kernel: %lfms\n", timing.hw_exec_t); - printf("\tHW PCIe Wr: %lfms\n\n", timing.hw_pcie_write_t); - } - - // destroy FFT input and output - free(inp); - free(out); - - // destroy data - fpga_final(); - - display_measures(total_api_time, avg_rd, avg_wr, avg_exec, avg_hw_rd, avg_hw_wr, avg_hw_exec, N, dim, iter, batch, inv, sp); - - return EXIT_SUCCESS; -} \ No newline at end of file diff --git a/examples/helper.cpp b/examples/helper.cpp index 2ec4d57..691c2be 100644 --- a/examples/helper.cpp +++ b/examples/helper.cpp @@ -1,6 +1,9 @@ #include +#include +#include #include "cxxopts.hpp" #include "helper.hpp" +#include "fftfpga/fftfpga.h" using namespace std; @@ -39,6 +42,7 @@ void parse_args(int argc, char* argv[], CONFIG &config){ ("c, batch", "Number of batches of FFT calculations in FPGA", cxxopts::value()->default_value("1") ) ("t, burst", "Toggle to use burst interleaved global memory accesses in FPGA", cxxopts::value()->default_value("false") ) ("m, use_bram", "Toggle to use BRAM instead of DDR for 3D Transpose ", cxxopts::value()->default_value("false") ) + ("e, emulate", "Toggle to enable emulation ", cxxopts::value()->default_value("false") ) ("h,help", "Print usage"); auto opt = options.parse(argc, argv); @@ -56,6 +60,7 @@ void parse_args(int argc, char* argv[], CONFIG &config){ config.batch = opt["batch"].as(); config.burst = opt["burst"].as(); config.use_bram = opt["use_bram"].as(); + config.emulate = opt["emulate"].as(); if(opt.count("path")){ config.path = opt["path"].as(); @@ -82,5 +87,137 @@ void print_config(CONFIG config){ printf("Iterations : %d \n", config.iter); printf("Transpose3D : %s \n", config.use_bram ? "BRAM":"DDR"); printf("Burst Interleaving : %s \n", config.burst ? "Yes":"No"); + printf("Emulation : %s \n", config.emulate ? "Yes":"No"); printf("--------------------------------------------\n\n"); +} + +/** + * \brief Verify by comparing FFT computed in FPGA with FFTW + * \param verify: float2 pointer for fftw cpu computation + * \param fpga_out: float2 pointer output from FPGA computation to verify + * \param config: struct of program state + * \return true if verification passed + */ +bool verify_fftwf(float2 *verify, float2 *fpgaout, const CONFIG config){ + + unsigned sz = pow(config.num, config.dim); + unsigned total_sz = config.batch * sz; + + fftwf_complex *fftw_data = fftwf_alloc_complex(sz); + + for(size_t i = 0; i < total_sz; i++){ + fftw_data[i][0] = verify[i].x; + fftw_data[i][1] = verify[i].y; + } + + //const int n[] = {N, N, N}; + int *n = (int*)calloc(config.num * config.dim , sizeof(int)); + for(unsigned i = 0; i < config.dim; i++){ + n[i] = config.num; + } + int idist = sz, odist = sz; + int istride = 1, ostride = 1; // contiguous in memory + + fftwf_plan plan; + if(config.inv){ + plan = fftwf_plan_many_dft(config.dim, n, config.batch, &fftw_data[0], NULL, istride, idist, fftw_data, NULL, ostride, odist, FFTW_BACKWARD, FFTW_ESTIMATE); + } + else{ + plan = fftwf_plan_many_dft(config.dim, n, config.batch, &fftw_data[0], NULL, istride, idist, fftw_data, NULL, ostride, odist, FFTW_FORWARD, FFTW_ESTIMATE); + } + + fftwf_execute(plan); + + // Verification using SNR + float mag_sum = 0, noise_sum = 0, magnitude, noise; + for (size_t i = 0; i < total_sz; i++) { + magnitude = fftw_data[i][0] * fftw_data[i][0] + \ + fftw_data[i][1] * fftw_data[i][1]; + noise = (fftw_data[i][0] - fpgaout[i].x) \ + * (fftw_data[i][0] - fpgaout[i].x) + + (fftw_data[i][1] - fpgaout[i].y) * (fftw_data[i][1] - fpgaout[i].y); + + mag_sum += magnitude; + noise_sum += noise; + } + +#ifndef NDEBUG + printf("\nFFTW and FFTFPGA results comparison: \n"); + for(unsigned i = 0; i < total_sz; i++){ + printf("%u : fpga - (%e %e) cpu - (%e %e)\n", i, fpgaout[i].x, fpgaout[i].y, fftw_data[i][0], fftw_data[i][1]); + } + printf("\n\n"); +#endif + + float db = 10 * log(mag_sum / noise_sum) / log(10.0); + + fftwf_free(fftw_data); + free(n); + + fftwf_destroy_plan(plan); + + // if SNR greater than 120, verification passes + if(db > 120) + return true; + else{ + printf("\tSignal to noise ratio on output sample: %f --> %s\n\n", db, "FAILED"); + return false; + } +} + +/** + * \brief print time taken for fpga and fftw runs + * \param config: custom structure of variables storing config values + * \param runtime: iteration number of fpga timing measurements + * \param total_api_time: time taken to call iter times the host code + */ + +void perf_measures(const CONFIG config, fpga_t *runtime){ + + fpga_t avg_runtime = {0.0, 0.0, 0.0, 0.0, 0.0, 0}; + for(unsigned i = 0; i < config.iter; i++){ + avg_runtime.exec_t += runtime[i].exec_t; + avg_runtime.pcie_read_t += runtime[i].pcie_read_t; + avg_runtime.pcie_write_t += runtime[i].pcie_write_t; + } + avg_runtime.exec_t = avg_runtime.exec_t / config.iter; + avg_runtime.pcie_read_t = avg_runtime.pcie_read_t / config.iter; + avg_runtime.pcie_write_t = avg_runtime.pcie_write_t / config.iter; + + fpga_t variance = {0.0, 0.0, 0.0, 0.0, 0.0, 0}; + fpga_t sd = {0.0, 0.0, 0.0, 0.0, 0.0, 0}; + for(unsigned i = 0; i < config.iter; i++){ + variance.exec_t += pow(runtime[i].exec_t - avg_runtime.exec_t, 2); + variance.pcie_read_t += pow(runtime[i].pcie_read_t - avg_runtime.pcie_read_t, 2); + variance.pcie_write_t += pow(runtime[i].pcie_write_t - avg_runtime.pcie_write_t, 2); + } + sd.exec_t = variance.exec_t / config.iter; + sd.pcie_read_t = variance.pcie_read_t / config.iter; + sd.pcie_write_t = variance.pcie_write_t / config.iter; + + double avg_total_runtime = avg_runtime.exec_t + avg_runtime.pcie_write_t + avg_runtime.pcie_read_t; + + double gpoints_per_sec = (config.batch * pow(config.num, config.dim)) / (avg_runtime.exec_t * 1e-3 * 1024 * 1024); + + double gBytes_per_sec = gpoints_per_sec * 8; // bytes + + double gflops = config.batch * config.dim * 5 * pow(config.num, config.dim) * (log((double)config.num)/log((double)2))/(avg_runtime.exec_t * 1e-3 * 1024*1024*1024); + + printf("\n\n------------------------------------------\n"); + printf("Measurements \n"); + printf("--------------------------------------------\n"); + printf("%s", config.iter>1 ? "Average Measurements of iterations\n":""); + printf("PCIe Write = %.4lfms\n", avg_runtime.pcie_write_t); + printf("Kernel Execution = %.4lfms\n", avg_runtime.exec_t); + printf("Kernel Exec/Batch = %.4lfms\n", avg_runtime.exec_t / config.batch); + printf("PCIe Read = %.4lfms\n", avg_runtime.pcie_read_t); + printf("Total = %.4lfms\n", avg_total_runtime); + printf("Throughput = %.4lfGFLOPS/s | %.4lf GB/s\n", gflops, gBytes_per_sec); + if(config.iter > 1){ + printf("\n"); + printf("%s", config.iter>1 ? "Standard Deviations of iterations\n":""); + printf("PCIe Write = %.4lfms\n", sd.pcie_write_t); + printf("Kernel Execution = %.4lfms\n", sd.exec_t); + printf("PCIe Read = %.4lfms\n", sd.pcie_read_t); + } } \ No newline at end of file diff --git a/examples/helper.hpp b/examples/helper.hpp index e759dc8..0c72191 100644 --- a/examples/helper.hpp +++ b/examples/helper.hpp @@ -14,6 +14,7 @@ struct CONFIG{ unsigned batch; bool burst; bool use_bram; + bool emulate; }; void parse_args(int argc, char* argv[], CONFIG &config); @@ -24,4 +25,8 @@ double getTimeinMilliSec(); void create_data(float2 *inp, const unsigned num); +bool verify_fftwf(float2 *verify, float2 *fpgaout, const CONFIG config); + +void perf_measures(const CONFIG config, fpga_t *runtime); + #endif // HELPER_HPP \ No newline at end of file diff --git a/kernels/CMakeLists.txt b/kernels/CMakeLists.txt index 0636a95..ae8298f 100644 --- a/kernels/CMakeLists.txt +++ b/kernels/CMakeLists.txt @@ -61,6 +61,13 @@ separate_arguments(REP_FLAGS) set(PROF_FLAGS "-profile=all" CACHE STRING "AOC profile flags") separate_arguments(PROF_FLAGS) +if(DEFINED ENV{LMOD_FAMILY_BSP_VERSION}) + set(BSP_VERSION "$ENV{LMOD_FAMILY_BSP_VERSION}") + message("-- BSP Version: ${BSP_VERSION}") +else() + message(ERROR, "No BSP Version Found") +endif() + if (INTELFPGAOPENCL_FOUND) add_subdirectory(fft1d) add_subdirectory(fft2d) diff --git a/kernels/cmake/genKernelTargets.cmake b/kernels/cmake/genKernelTargets.cmake index 78deba0..03410a2 100644 --- a/kernels/cmake/genKernelTargets.cmake +++ b/kernels/cmake/genKernelTargets.cmake @@ -23,7 +23,7 @@ function(gen_fft_targets) set(PROF_BSTREAM "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/prof_${FFT_SIZE}_${kernel_fname}/${kernel_fname}.aocx") set(SYN_BSTREAM - "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/syn_${FFT_SIZE}_${kernel_fname}/${kernel_fname}.aocx") + "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${BSP_VERSION}/syn_${FFT_SIZE}_${kernel_fname}/${kernel_fname}.aocx") # Emulation Target add_custom_command(OUTPUT ${EMU_BSTREAM} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index f215603..bf3c368 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,9 +1,6 @@ # Author: Arjun Ramaswami - cmake_minimum_required(VERSION 3.10) -add_subdirectory(../extern/gtest ${CMAKE_CURRENT_BINARY_DIR}/lib) - add_executable(test_fftfpga #${CMAKE_CURRENT_SOURCE_DIR}/test_fft_fpga.cpp test_fft_setup.cpp @@ -21,11 +18,10 @@ target_include_directories(test_fftfpga ${IntelFPGAOpenCL_INCLUDE_DIRS} ${examplesfftfpga_SOURCE_DIR}/common ${CMAKE_SOURCE_DIR}/api/src - ${CMAKE_SOURCE_DIR}/extern/argparse ) target_link_libraries(test_fftfpga PUBLIC - gtest_main gtest gmock ${IntelFPGAOpenCL_LIBRARIES} argparse fftfpga m + gtest_main gtest gmock ${IntelFPGAOpenCL_LIBRARIES} fftfpga m ) if(FFTW_FOUND) From 16f633e4796f874d883ad6d47bf04f5b744ad713 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Mon, 4 Oct 2021 11:49:13 +0200 Subject: [PATCH 45/76] verify bitrev fft1d, using opt bram fft3d --- CMakeLists.txt | 4 +- api/CMakeLists.txt | 32 +- api/src/fft1d.c | 29 +- api/src/fftfpga.c | 29 +- api/src/opencl_utils.c | 8 +- .../cmake => cmake}/genKernelTargets.cmake | 0 examples/fft.cpp | 6 +- examples/helper.cpp | 40 +- kernels/CMakeLists.txt | 2 +- kernels/fft1d/CMakeLists.txt | 2 +- kernels/fft2d/CMakeLists.txt | 4 +- kernels/fft2d/fft2d_bram.cl | 392 +++++++++++------- kernels/fft2d/fft2d_bram_opt.cl | 287 ------------- kernels/fft3d/CMakeLists.txt | 5 +- kernels/fft3d/fft3d_bram_triv.cl | 317 -------------- kernels/fft3d/fft3d_ddr_triv.cl | 389 ----------------- tests/CMakeLists.txt | 2 +- 17 files changed, 309 insertions(+), 1239 deletions(-) rename {kernels/cmake => cmake}/genKernelTargets.cmake (100%) delete mode 100644 kernels/fft2d/fft2d_bram_opt.cl delete mode 100755 kernels/fft3d/fft3d_bram_triv.cl delete mode 100755 kernels/fft3d/fft3d_ddr_triv.cl diff --git a/CMakeLists.txt b/CMakeLists.txt index b451f4d..32ae2ea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ # Author: Arjun Ramaswami -cmake_minimum_required (VERSION 3.10.3) +cmake_minimum_required (VERSION 3.10) project(fft VERSION 2.0 DESCRIPTION "OpenCL based FFT library for Intel FPGAs" LANGUAGES C CXX) @@ -20,7 +20,7 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") find_package(FFTW REQUIRED) message("-- FFTW found") -# Add sub directories +# sub directories add_subdirectory(api) add_subdirectory(kernels) add_subdirectory(examples) diff --git a/api/CMakeLists.txt b/api/CMakeLists.txt index 2e8cb26..13864e0 100755 --- a/api/CMakeLists.txt +++ b/api/CMakeLists.txt @@ -1,6 +1,6 @@ # Arjun Ramaswami cmake_minimum_required(VERSION 3.10) -project(fftfpga VERSION 0.3 +project(fftfpga VERSION 2.0 DESCRIPTION "APIs for FFT using FPGAs" LANGUAGES C CXX) @@ -29,32 +29,4 @@ target_include_directories(${PROJECT_NAME} PUBLIC ${IntelFPGAOpenCL_INCLUDE_DIRS} ${PROJECT_SOURCE_DIR}/include) target_link_libraries(${PROJECT_NAME} - PUBLIC ${IntelFPGAOpenCL_LIBRARIES} m) - -## -# Doxygen Build -## -option(BUILD_DOC "Build documentation" OFF) - -if(BUILD_DOC) - - find_package(Doxygen) - if(DOXYGEN_FOUND) - - # Doxygen Options - set(DOXYGEN_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/docs) - set(DOXYGEN_PROJECT_NAME "FFTFPGA") - set(DOXYGEN_PROJECT_BRIEF "OpenCL based FFT library for FPGAs") - set(DOXYGEN_GENERATE_LATEX YES) - set(DOXYGEN_OPTIMIZE_OUTPUT_FOR_C YES) - set(DOXYGEN_SHOW_FILES YES) - - doxygen_add_docs(doc_doxygen - ${PROJECT_SOURCE_DIR}/include/fftfpga/fftfpga.h - COMMENT "Generate library documentation" - ) - - else (DOXYGEN_FOUND) - message(WARNING, "Doxygen need to be installed to generate the doxygen documentation") - endif (DOXYGEN_FOUND) -endif() \ No newline at end of file + PUBLIC ${IntelFPGAOpenCL_LIBRARIES} m) \ No newline at end of file diff --git a/api/src/fft1d.c b/api/src/fft1d.c index 4991261..7313b47 100644 --- a/api/src/fft1d.c +++ b/api/src/fft1d.c @@ -34,9 +34,7 @@ fpga_t fftfpga_c2c_1d(unsigned N, const double2 *inp, double2 *out, bool inv, un return fft_time; } -#ifdef VERBOSE - printf("Launching%s FFT transform of %d batches \n", inv ? " inverse":"", batch); -#endif + printf("-- Launching%s 1D FFT of %d batches \n", inv ? " inverse":"", batch); queue_setup(); @@ -47,6 +45,7 @@ fpga_t fftfpga_c2c_1d(unsigned N, const double2 *inp, double2 *out, bool inv, un d_outData = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(double2) * N * batch, NULL, &status); checkError(status, "Failed to allocate output device buffer\n"); + printf("-- Copying data from host to device\n"); // Copy data from host to device cl_event writeBuf_event; status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(double2) * N * batch, inp, 0, NULL, &writeBuf_event); @@ -87,6 +86,7 @@ fpga_t fftfpga_c2c_1d(unsigned N, const double2 *inp, double2 *out, bool inv, un size_t ls = N/8; size_t gs = batch * ls; + printf("-- Executing kernels\n"); // Measure execution time cl_event exec_event; // FFT1d kernel is the SWI kernel @@ -110,6 +110,7 @@ fpga_t fftfpga_c2c_1d(unsigned N, const double2 *inp, double2 *out, bool inv, un fft_time.exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); // Copy results from device to host + printf("-- Transfering results back to host\n"); cl_event readBuf_event; status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * batch, out, 0, NULL, &readBuf_event); checkError(status, "Failed to copy data from device"); @@ -157,9 +158,7 @@ fpga_t fftfpgaf_c2c_1d(unsigned N, const float2 *inp, float2 *out, bool inv, uns return fft_time; } -#ifdef VERBOSE - printf("Launching%s FFT transform for %d batch \n", inv ? " inverse":"", batch); -#endif + printf("-- Launching%s 1D FFT of %d batches \n", inv ? " inverse":"", batch); queue_setup(); @@ -173,6 +172,7 @@ fpga_t fftfpgaf_c2c_1d(unsigned N, const float2 *inp, float2 *out, bool inv, uns d_outData = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * N * batch, NULL, &status); checkError(status, "Failed to allocate output device buffer\n"); + printf("-- Copying data from host to device\n"); // Copy data from host to device status = clEnqueueWriteBuffer(queue1, d_inData, CL_TRUE, 0, sizeof(float2) * N * batch, inp, 0, NULL, NULL); checkError(status, "Failed to copy data to device"); @@ -199,12 +199,10 @@ fpga_t fftfpgaf_c2c_1d(unsigned N, const float2 *inp, float2 *out, bool inv, uns status = clSetKernelArg(kernel2, 2, sizeof(cl_int), (void*)&inverse_int); checkError(status, "Failed to set kernel arg 2"); - printf(inverse_int ? "\tInverse FFT" : "\tFFT"); - printf(" kernel initialization is complete.\n"); - size_t ls = N/8; size_t gs = batch * ls; + printf("-- Executing kernels\n"); cl_event startExec_event, endExec_event; // Measure execution time // Launch the kernel - we launch a single work item hence enqueue a task @@ -223,13 +221,12 @@ fpga_t fftfpgaf_c2c_1d(unsigned N, const float2 *inp, float2 *out, bool inv, uns // Record execution time cl_ulong kernel_start = 0, kernel_end = 0; - clGetEventProfilingInfo(startExec_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); clGetEventProfilingInfo(endExec_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end, NULL); - fft_time.exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); // Copy results from device to host + printf("-- Transfering results back to host\n"); status = clEnqueueReadBuffer(queue1, d_outData, CL_TRUE, 0, sizeof(float2) * N * batch, out, 0, NULL, NULL); checkError(status, "Failed to copy data from device"); @@ -270,9 +267,7 @@ fpga_t fftfpgaf_c2c_1d_svm(unsigned N, const float2 *inp, float2 *out, bool inv, return fft_time; } -#ifdef VERBOSE - printf("Launching%s 1D FFT transform in DDR \n", inv ? " inverse":""); -#endif + printf("-- Launching%s 1D FFT of %d batches using SVM\n", inv ? " inverse":"", batch); // Can't pass bool to device, so convert it to int int inverse_int = (int)inv; @@ -291,10 +286,10 @@ fpga_t fftfpgaf_c2c_1d_svm(unsigned N, const float2 *inp, float2 *out, bool inv, h_inData = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); h_outData = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); + // copy data into h_inData status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData, sizeof(float2) * num_pts, 0, NULL, NULL); checkError(status, "Failed to map input data"); - // copy data into h_inData for(size_t i = 0; i < num_pts; i++){ h_inData[i].x = inp[i].x; h_inData[i].y = inp[i].y; @@ -303,10 +298,10 @@ fpga_t fftfpgaf_c2c_1d_svm(unsigned N, const float2 *inp, float2 *out, bool inv, status = clEnqueueSVMUnmap(queue1, (void *)h_inData, 0, NULL, NULL); checkError(status, "Failed to unmap input data"); + // initialize h_outData with zeroes status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); checkError(status, "Failed to map input data"); - // copy data into h_inData for(size_t i = 0; i < num_pts; i++){ h_outData[i].x = 0.0; h_outData[i].y = 0.0; @@ -332,6 +327,7 @@ fpga_t fftfpgaf_c2c_1d_svm(unsigned N, const float2 *inp, float2 *out, bool inv, size_t ls = N/8; size_t gs = batch * ls; + printf("-- Executing\n"); cl_event startExec_event, endExec_event; status = clEnqueueTask(queue1, fft_kernel, 0, NULL, &endExec_event); @@ -373,7 +369,6 @@ fpga_t fftfpgaf_c2c_1d_svm(unsigned N, const float2 *inp, float2 *out, bool inv, if(fetch_kernel) clReleaseKernel(fetch_kernel); - if(fft_kernel) clReleaseKernel(fft_kernel); diff --git a/api/src/fftfpga.c b/api/src/fftfpga.c index ce3101e..711cd43 100755 --- a/api/src/fftfpga.c +++ b/api/src/fftfpga.c @@ -68,15 +68,11 @@ void* fftfpgaf_complex_malloc(size_t sz){ -3 Unable to find devices for given OpenCL platform -4 Failed to create program, file not found in path -5 Device does not support required SVM - - */ +*/ int fpga_initialize(const char *platform_name, const char *path, bool use_svm){ cl_int status = 0; -#ifdef VERBOSE - printf("\tInitializing FPGA ...\n"); -#endif - + printf("-- Initializing FPGA ...\n"); // Path to binary missing if(path == NULL || strlen(path) == 0){ return -1; @@ -93,21 +89,21 @@ int fpga_initialize(const char *platform_name, const char *path, bool use_svm){ cl_uint num_devices; devices = getDevices(platform, CL_DEVICE_TYPE_ALL, &num_devices); // Unable to find device for the OpenCL platform - printf("Number of devices: %u\n", num_devices); + printf("\n\t%u devices found\n", num_devices); if(devices == NULL){ return -3; } // use the first device. device = devices[0]; - printf(" -- Choosing first device\n"); + printf("\t\tChoosing first device by default\n"); if(use_svm){ if(!check_valid_svm_device(device)){ return -5; } else{ - printf(" -- Supports SVM \n"); + printf("\t\tDevice supports SVM \n"); svm_enabled = true; } } @@ -116,9 +112,7 @@ int fpga_initialize(const char *platform_name, const char *path, bool use_svm){ context = clCreateContext(NULL, 1, &device, NULL, NULL, &status); checkError(status, "Failed to create context"); -#ifdef VERBOSE - printf("\tGetting program binary from path %s ...\n", path); -#endif + printf("\n\tGetting program binary from path: %s\n", path); // Create the program. program = getProgramWithBinary(context, &device, 1, path); if(program == NULL) { @@ -127,9 +121,7 @@ int fpga_initialize(const char *platform_name, const char *path, bool use_svm){ return -4; } -#ifdef VERBOSE - printf("\tBuilding program ...\n"); -#endif + printf("\tBuilding the program\n\n"); // Build the program that was just created. status = clBuildProgram(program, 0, NULL, "", NULL, NULL); checkError(status, "Failed to build program"); @@ -141,10 +133,7 @@ int fpga_initialize(const char *platform_name, const char *path, bool use_svm){ * @brief Release FPGA Resources */ void fpga_final(){ - -#ifdef VERBOSE - printf("\tCleaning up FPGA resources ...\n"); -#endif + printf("-- Cleaning up FPGA resources ...\n"); if(program) clReleaseProgram(program); if(context) @@ -156,6 +145,7 @@ void fpga_final(){ * \brief Create a command queue for each kernel */ void queue_setup(){ + printf("-- Creating queues\n"); cl_int status = 0; // Create one command queue for each kernel. queue1 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status); @@ -180,6 +170,7 @@ void queue_setup(){ * \brief Release all command queues */ void queue_cleanup() { + printf("-- Destroying queues\n"); if(queue1) clReleaseCommandQueue(queue1); if(queue2) diff --git a/api/src/opencl_utils.c b/api/src/opencl_utils.c index 38e3e78..f5c2677 100755 --- a/api/src/opencl_utils.c +++ b/api/src/opencl_utils.c @@ -50,9 +50,7 @@ cl_platform_id findPlatform(const char *platform_name){ char name_search[pl_len + 1]; // VLA tolowercase(platform_name, name_search); -#ifndef NDEBUG - printf("Num of Platforms found - %d\n", num_platforms); -#endif + printf("\t%d platforms found\n", num_platforms); // Search the platforms for the platform name passed as argument for(int i = 0; i < num_platforms; i++){ @@ -75,9 +73,7 @@ cl_platform_id findPlatform(const char *platform_name){ } tolowercase(plat_name, plat_name_lc); -#ifndef NDEBUG - printf(" %d - %s \n", i, plat_name_lc); -#endif + printf("\t\t%d: %s\n", i, plat_name_lc); if( strstr(plat_name_lc, name_search)){ cl_platform_id pid = pids[i]; free(pids); diff --git a/kernels/cmake/genKernelTargets.cmake b/cmake/genKernelTargets.cmake similarity index 100% rename from kernels/cmake/genKernelTargets.cmake rename to cmake/genKernelTargets.cmake diff --git a/examples/fft.cpp b/examples/fft.cpp index 0f83177..f0cfe00 100644 --- a/examples/fft.cpp +++ b/examples/fft.cpp @@ -25,7 +25,7 @@ int main(int argc, char* argv[]){ } const unsigned num = config.num; - const unsigned sz = pow(num, config.dim); + const unsigned sz = config.batch * pow(num, config.dim); float2 *inp = new float2[sz](); float2 *out = new float2[sz](); fpga_t runtime[config.iter]; @@ -77,11 +77,11 @@ int main(int argc, char* argv[]){ return EXIT_FAILURE; } - perf_measures(config, runtime); - // destroy fpga state fpga_final(); + perf_measures(config, runtime); + delete inp; delete out; return EXIT_SUCCESS; diff --git a/examples/helper.cpp b/examples/helper.cpp index 691c2be..72a5e01 100644 --- a/examples/helper.cpp +++ b/examples/helper.cpp @@ -7,6 +7,16 @@ using namespace std; +unsigned bit_reversed(unsigned x, unsigned bits) { + unsigned y = 0; + for (unsigned i = 0; i < bits; i++) { + y <<= 1; + y |= x & 1; + x >>= 1; + } + return y; +} + /** * \brief create random single precision complex floating point values * \param inp : pointer to float2 data of size N @@ -103,15 +113,14 @@ bool verify_fftwf(float2 *verify, float2 *fpgaout, const CONFIG config){ unsigned sz = pow(config.num, config.dim); unsigned total_sz = config.batch * sz; - fftwf_complex *fftw_data = fftwf_alloc_complex(sz); + fftwf_complex *fftw_data = fftwf_alloc_complex(total_sz); for(size_t i = 0; i < total_sz; i++){ fftw_data[i][0] = verify[i].x; fftw_data[i][1] = verify[i].y; } - //const int n[] = {N, N, N}; - int *n = (int*)calloc(config.num * config.dim , sizeof(int)); + int *n = (int*)calloc(config.dim , sizeof(int)); for(unsigned i = 0; i < config.dim; i++){ n[i] = config.num; } @@ -120,14 +129,35 @@ bool verify_fftwf(float2 *verify, float2 *fpgaout, const CONFIG config){ fftwf_plan plan; if(config.inv){ - plan = fftwf_plan_many_dft(config.dim, n, config.batch, &fftw_data[0], NULL, istride, idist, fftw_data, NULL, ostride, odist, FFTW_BACKWARD, FFTW_ESTIMATE); + plan = fftwf_plan_many_dft(config.dim, n, config.batch, &fftw_data[0], NULL, istride, idist, &fftw_data[0], NULL, ostride, odist, FFTW_BACKWARD, FFTW_ESTIMATE); } else{ - plan = fftwf_plan_many_dft(config.dim, n, config.batch, &fftw_data[0], NULL, istride, idist, fftw_data, NULL, ostride, odist, FFTW_FORWARD, FFTW_ESTIMATE); + plan = fftwf_plan_many_dft(config.dim, n, config.batch, &fftw_data[0], NULL, istride, idist, &fftw_data[0], NULL, ostride, odist, FFTW_FORWARD, FFTW_ESTIMATE); } fftwf_execute(plan); + if(config.dim == 1){ + unsigned log_dim = log2(config.num); + float2 *tmp = new float2[total_sz](); + + for(unsigned j = 0; j < config.batch; j++){ + for(unsigned i = 0; i < config.num; i++){ + unsigned index = (j*config.num) + i; + unsigned bit_rev = (j*config.num) + bit_reversed(i, log_dim); + + tmp[index].x = fpgaout[bit_rev].x; + tmp[index].y = fpgaout[bit_rev].y; + } + } + for(unsigned i = 0; i < total_sz; i++ ){ + fpgaout[i].x = tmp[i].x; + fpgaout[i].y = tmp[i].y; + } + + delete tmp; + } + // Verification using SNR float mag_sum = 0, noise_sum = 0, magnitude, noise; for (size_t i = 0; i < total_sz; i++) { diff --git a/kernels/CMakeLists.txt b/kernels/CMakeLists.txt index ae8298f..2276f21 100644 --- a/kernels/CMakeLists.txt +++ b/kernels/CMakeLists.txt @@ -54,7 +54,7 @@ endif() ## Flags for different target options set(AOC_FLAGS "-g -v -no-interleaving=default ${BOARD_PACKAGE}" CACHE STRING "AOC compiler flags") separate_arguments(AOC_FLAGS) -set(EMU_FLAGS "-legacy-emulator -march=emulator" CACHE STRING "AOC emulation flags") +set(EMU_FLAGS "-march=emulator" CACHE STRING "AOC emulation flags") separate_arguments(EMU_FLAGS) set(REP_FLAGS "-report -rtl" CACHE STRING "AOC report flags") separate_arguments(REP_FLAGS) diff --git a/kernels/fft1d/CMakeLists.txt b/kernels/fft1d/CMakeLists.txt index bea021e..8fdeef1 100644 --- a/kernels/fft1d/CMakeLists.txt +++ b/kernels/fft1d/CMakeLists.txt @@ -11,7 +11,7 @@ cmake_minimum_required(VERSION 3.10) set(CL_PATH "${fftkernelsfpga_SOURCE_DIR}/fft1d") set(kernels fft1d) -include(${fftkernelsfpga_SOURCE_DIR}/cmake/genKernelTargets.cmake) +include(${fft_SOURCE_DIR}/cmake/genKernelTargets.cmake) if (INTELFPGAOPENCL_FOUND) gen_fft_targets(${kernels}) diff --git a/kernels/fft2d/CMakeLists.txt b/kernels/fft2d/CMakeLists.txt index 2f893d5..00c8179 100644 --- a/kernels/fft2d/CMakeLists.txt +++ b/kernels/fft2d/CMakeLists.txt @@ -9,9 +9,9 @@ cmake_minimum_required(VERSION 3.10) # - ${kernel_name}_syn: to generate synthesis binary ## set(CL_PATH "${fftkernelsfpga_SOURCE_DIR}/fft2d") -set(kernels fft2d_bram fft2d_ddr fft2d_bram_opt) +set(kernels fft2d_bram fft2d_ddr) -include(${fftkernelsfpga_SOURCE_DIR}/cmake/genKernelTargets.cmake) +include(${fft_SOURCE_DIR}/cmake/genKernelTargets.cmake) if (INTELFPGAOPENCL_FOUND) gen_fft_targets(${kernels}) diff --git a/kernels/fft2d/fft2d_bram.cl b/kernels/fft2d/fft2d_bram.cl index 72f3b6c..d35d88e 100644 --- a/kernels/fft2d/fft2d_bram.cl +++ b/kernels/fft2d/fft2d_bram.cl @@ -1,57 +1,64 @@ // Author: Arjun Ramaswami -#include "fft_8.cl" - -// Source the log(size) (log(1k) = 10) from a header shared with the host code #include "fft_config.h" +#include "fft_8.cl" +#include "../matrixTranspose/diagonal_bitrev.cl" #pragma OPENCL EXTENSION cl_intel_channels : enable -channel float2 chaninfft1[8] __attribute__((depth(8))); -channel float2 chanoutfft1[8] __attribute__((depth(8))); - -channel float2 chaninfft2[8] __attribute__((depth(8))); -channel float2 chanoutfft2[8] __attribute__((depth(8))); - -int bit_reversed(int x, int bits) { - int y = 0; - #pragma unroll - for (int i = 0; i < bits; i++) { - y <<= 1; - y |= x & 1; - x >>= 1; - } - return y; -} +channel float2 chaninfft2da[POINTS] __attribute__((depth(POINTS))); +channel float2 chaninfft2db[POINTS] __attribute__((depth(POINTS))); + +channel float2 chaninTranspose[POINTS] __attribute__((depth(POINTS))); +channel float2 chaninTransStore[POINTS] __attribute__((depth(POINTS))); -// Kernel that fetches data from global memory -kernel void fetch(global volatile float2 * restrict src) { +kernel void fetchBitrev(global volatile float2 * restrict src, int how_many) { + unsigned delay = (1 << (LOGN - LOGPOINTS)); // N / 8 + bool is_bitrevA = false; - for(unsigned k = 0; k < N; k++){ - float2 buf[N]; + float2 __attribute__((memory, numbanks(8))) buf[2][N]; + + // additional iterations to fill the buffers + for(unsigned step = 0; step < (how_many * DEPTH) + delay; step++){ - #pragma unroll 8 - for(unsigned i = 0; i < N; i++){ - buf[i & ((1<= delay) { + write_channel_intel(chaninfft2da[0], data.i0); + write_channel_intel(chaninfft2da[1], data.i1); + write_channel_intel(chaninfft2da[2], data.i2); + write_channel_intel(chaninfft2da[3], data.i3); + write_channel_intel(chaninfft2da[4], data.i4); + write_channel_intel(chaninfft2da[5], data.i5); + write_channel_intel(chaninfft2da[6], data.i6); + write_channel_intel(chaninfft2da[7], data.i7); } } } -/* This single work-item task wraps the FFT engine - * 'inverse' toggles between the direct and the inverse transform - */ - -kernel void fft2da(int inverse) { +kernel void fft2da(int inverse, int how_many) { /* The FFT engine requires a sliding window for data reordering; data stored * in this array is carried across loop iterations and shifted by 1 element @@ -62,79 +69,115 @@ kernel void fft2da(int inverse) { float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; // needs to run "N / 8 - 1" additional iterations to drain the last outputs - for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { - float2x8 data; - - // Read data from channels - if (i < N * (N / POINTS)) { - data.i0 = read_channel_intel(chaninfft1[0]); - data.i1 = read_channel_intel(chaninfft1[1]); - data.i2 = read_channel_intel(chaninfft1[2]); - data.i3 = read_channel_intel(chaninfft1[3]); - data.i4 = read_channel_intel(chaninfft1[4]); - data.i5 = read_channel_intel(chaninfft1[5]); - data.i6 = read_channel_intel(chaninfft1[6]); - data.i7 = read_channel_intel(chaninfft1[7]); - } - else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } + #pragma loop_coalesce + for(unsigned j = 0; j < how_many; j++){ + for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { + float2x8 data; + + // Read data from channels + if (i < N * (N / POINTS)) { + data.i0 = read_channel_intel(chaninfft2da[0]); + data.i1 = read_channel_intel(chaninfft2da[1]); + data.i2 = read_channel_intel(chaninfft2da[2]); + data.i3 = read_channel_intel(chaninfft2da[3]); + data.i4 = read_channel_intel(chaninfft2da[4]); + data.i5 = read_channel_intel(chaninfft2da[5]); + data.i6 = read_channel_intel(chaninfft2da[6]); + data.i7 = read_channel_intel(chaninfft2da[7]); + } + else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } - // Perform one FFT step - data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); - - // Write result to channels - if (i >= N / POINTS - 1) { - write_channel_intel(chanoutfft1[0], data.i0); - write_channel_intel(chanoutfft1[1], data.i1); - write_channel_intel(chanoutfft1[2], data.i2); - write_channel_intel(chanoutfft1[3], data.i3); - write_channel_intel(chanoutfft1[4], data.i4); - write_channel_intel(chanoutfft1[5], data.i5); - write_channel_intel(chanoutfft1[6], data.i6); - write_channel_intel(chanoutfft1[7], data.i7); + // Perform one FFT step + data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); + + // Write result to channels + if (i >= N / POINTS - 1) { + write_channel_intel(chaninTranspose[0], data.i0); + write_channel_intel(chaninTranspose[1], data.i1); + write_channel_intel(chaninTranspose[2], data.i2); + write_channel_intel(chaninTranspose[3], data.i3); + write_channel_intel(chaninTranspose[4], data.i4); + write_channel_intel(chaninTranspose[5], data.i5); + write_channel_intel(chaninTranspose[6], data.i6); + write_channel_intel(chaninTranspose[7], data.i7); + } } } } -// Transposes fetched data; stores them to global memory -kernel void transpose(){ - unsigned revcolt, where, where_write; - - local float2 buf[N * N]; - - // Perform N*N transpositions and transfers - for(unsigned i = 0; i < N; i++){ - for(unsigned k = 0; k < (N / 8); k++){ - where = ((i << LOGN) + (k << LOGPOINTS)); - - #pragma unroll 8 - for( unsigned u = 0; u < 8; u++){ - buf[where + u] = read_channel_intel(chanoutfft1[u]); - } +kernel void transpose(int how_many) { + const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 + bool is_bufA = false, is_bitrevA = false; + + float2 buf[2][DEPTH][POINTS]; + float2 bitrev_in[2][N]; + float2 __attribute__((memory, numbanks(8))) bitrev_out[2][N]; + + int initial_delay = DELAY + DELAY; // for each of the bitrev buffer + + // additional iterations to fill the buffers + for(int step = -initial_delay; step < ((how_many * DEPTH) + DEPTH); step++){ + + float2x8 data, data_out; + if (step < ((how_many * DEPTH) - initial_delay)) { + data.i0 = read_channel_intel(chaninTranspose[0]); + data.i1 = read_channel_intel(chaninTranspose[1]); + data.i2 = read_channel_intel(chaninTranspose[2]); + data.i3 = read_channel_intel(chaninTranspose[3]); + data.i4 = read_channel_intel(chaninTranspose[4]); + data.i5 = read_channel_intel(chaninTranspose[5]); + data.i6 = read_channel_intel(chaninTranspose[6]); + data.i7 = read_channel_intel(chaninTranspose[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; } - } - for(unsigned i = 0; i < N; i++){ - revcolt = bit_reversed(i, LOGN); - - for(unsigned k = 0; k < (N / 8); k++){ - where_write = ((k * N) + revcolt); - - write_channel_intel(chaninfft2[0], buf[where_write]); // 0 - write_channel_intel(chaninfft2[1], buf[where_write + 4 * (N / 8) * N]); // 32 - write_channel_intel(chaninfft2[2], buf[where_write + 2 * (N / 8) * N]); // 16 - write_channel_intel(chaninfft2[3], buf[where_write + 6 * (N / 8) * N]); // 48 - write_channel_intel(chaninfft2[4], buf[where_write + (N / 8) * N]); // 8 - write_channel_intel(chaninfft2[5], buf[where_write + 5 * (N / 8) * N]); // 40 - write_channel_intel(chaninfft2[6], buf[where_write + 3 * (N / 8) * N]); // 24 - write_channel_intel(chaninfft2[7], buf[where_write + 7 * (N / 8) * N]); // 54 + // Swap buffers every N*N/8 iterations + // starting from the additional delay of N/8 iterations + is_bufA = (( (step + DELAY) & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; + + // Swap bitrev buffers every N/8 iterations + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + unsigned row = step & (DEPTH - 1); + data = bitreverse_in(data, + is_bitrevA ? bitrev_in[0] : bitrev_in[1], + is_bitrevA ? bitrev_in[1] : bitrev_in[0], + row); + + writeBuf(data, + is_bufA ? buf[0] : buf[1], + step, DELAY); + + data_out = readBuf( + is_bufA ? buf[1] : buf[0], + step); + + unsigned start_row = (step + DELAY) & (DEPTH -1); + data_out = bitreverse_out( + is_bitrevA ? bitrev_out[0] : bitrev_out[1], + is_bitrevA ? bitrev_out[1] : bitrev_out[0], + data_out, start_row); + + if (step >= (DEPTH)) { + write_channel_intel(chaninfft2db[0], data_out.i0); + write_channel_intel(chaninfft2db[1], data_out.i1); + write_channel_intel(chaninfft2db[2], data_out.i2); + write_channel_intel(chaninfft2db[3], data_out.i3); + write_channel_intel(chaninfft2db[4], data_out.i4); + write_channel_intel(chaninfft2db[5], data_out.i5); + write_channel_intel(chaninfft2db[6], data_out.i6); + write_channel_intel(chaninfft2db[7], data_out.i7); } } } -kernel void fft2db(int inverse) { +kernel void fft2db(int inverse, int how_many) { + /* The FFT engine requires a sliding window for data reordering; data stored * in this array is carried across loop iterations and shifted by 1 element * every iteration; all loop dependencies derived from the uses of this @@ -143,65 +186,102 @@ kernel void fft2db(int inverse) { float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; - for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { - float2x8 data; - - // Read data from channels - if (i < N * (N / POINTS)) { - data.i0 = read_channel_intel(chaninfft2[0]); - data.i1 = read_channel_intel(chaninfft2[1]); - data.i2 = read_channel_intel(chaninfft2[2]); - data.i3 = read_channel_intel(chaninfft2[3]); - data.i4 = read_channel_intel(chaninfft2[4]); - data.i5 = read_channel_intel(chaninfft2[5]); - data.i6 = read_channel_intel(chaninfft2[6]); - data.i7 = read_channel_intel(chaninfft2[7]); - } else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } + #pragma loop_coalesce + for(unsigned j = 0; j < 1; j++){ + for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { + float2x8 data; + + // Read data from channels + if (i < N * (N / POINTS)) { + data.i0 = read_channel_intel(chaninfft2db[0]); + data.i1 = read_channel_intel(chaninfft2db[1]); + data.i2 = read_channel_intel(chaninfft2db[2]); + data.i3 = read_channel_intel(chaninfft2db[3]); + data.i4 = read_channel_intel(chaninfft2db[4]); + data.i5 = read_channel_intel(chaninfft2db[5]); + data.i6 = read_channel_intel(chaninfft2db[6]); + data.i7 = read_channel_intel(chaninfft2db[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } - // Perform one FFT step - data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); - - // Write result to channels - if (i >= N / POINTS - 1) { - write_channel_intel(chanoutfft2[0], data.i0); - write_channel_intel(chanoutfft2[1], data.i1); - write_channel_intel(chanoutfft2[2], data.i2); - write_channel_intel(chanoutfft2[3], data.i3); - write_channel_intel(chanoutfft2[4], data.i4); - write_channel_intel(chanoutfft2[5], data.i5); - write_channel_intel(chanoutfft2[6], data.i6); - write_channel_intel(chanoutfft2[7], data.i7); + // Perform one FFT step + data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); + + // Write result to channels + if (i >= N / POINTS - 1) { + write_channel_intel(chaninTransStore[0], data.i0); + write_channel_intel(chaninTransStore[1], data.i1); + write_channel_intel(chaninTransStore[2], data.i2); + write_channel_intel(chaninTransStore[3], data.i3); + write_channel_intel(chaninTransStore[4], data.i4); + write_channel_intel(chaninTransStore[5], data.i5); + write_channel_intel(chaninTransStore[6], data.i6); + write_channel_intel(chaninTransStore[7], data.i7); + } } } } -kernel void store(global volatile float2 * restrict dest){ - unsigned revcolt, where; - - local float2 buf[N * N]; - - // perform N*N writes to buffer - for(unsigned i = 0; i < N; i++){ - for(unsigned j = 0; j < (N / 8); j++){ - where = ((i << LOGN) + (j << LOGPOINTS)); - - #pragma unroll 8 - for(unsigned u = 0; u < 8; u++){ - buf[where + u] = read_channel_intel(chanoutfft2[u]); - } +kernel void transposeStore(global volatile float2 * restrict dest, int how_many) { + + const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 + bool is_bufA = false, is_bitrevA = false; + + float2 buf[2][DEPTH][POINTS]; + float2 bitrev_in[2][N]; + + int initial_delay = DELAY; // for each of the bitrev buffer + // additional iterations to fill the buffers + for(int step = -initial_delay; step < ((how_many * DEPTH) + DEPTH); step++){ + + float2x8 data, data_out; + if (step < ((how_many * DEPTH) - initial_delay)) { + data.i0 = read_channel_intel(chaninTransStore[0]); + data.i1 = read_channel_intel(chaninTransStore[1]); + data.i2 = read_channel_intel(chaninTransStore[2]); + data.i3 = read_channel_intel(chaninTransStore[3]); + data.i4 = read_channel_intel(chaninTransStore[4]); + data.i5 = read_channel_intel(chaninTransStore[5]); + data.i6 = read_channel_intel(chaninTransStore[6]); + data.i7 = read_channel_intel(chaninTransStore[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; } - } - - for(unsigned i = 0; i < N; i++){ - revcolt = bit_reversed(i, LOGN); - where = (i << LOGN); - - #pragma unroll 8 - for( unsigned u = 0; u < N; u++){ - dest[where + u] = buf[(u << LOGN) + revcolt]; + // Swap buffers every N*N/8 iterations + // starting from the additional delay of N/8 iterations + is_bufA = (( step & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; + + // Swap bitrev buffers every N/8 iterations + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + unsigned row = step & (DEPTH - 1); + data = bitreverse_in(data, + is_bitrevA ? bitrev_in[0] : bitrev_in[1], + is_bitrevA ? bitrev_in[1] : bitrev_in[0], + row); + + writeBuf(data, + is_bufA ? buf[0] : buf[1], + step, 0); + + data_out = readBuf_store( + is_bufA ? buf[1] : buf[0], + step); + + if (step >= (DEPTH)) { + unsigned index = (step - DEPTH) * 8; + + dest[index + 0] = data_out.i0; + dest[index + 1] = data_out.i1; + dest[index + 2] = data_out.i2; + dest[index + 3] = data_out.i3; + dest[index + 4] = data_out.i4; + dest[index + 5] = data_out.i5; + dest[index + 6] = data_out.i6; + dest[index + 7] = data_out.i7; } } -} +} \ No newline at end of file diff --git a/kernels/fft2d/fft2d_bram_opt.cl b/kernels/fft2d/fft2d_bram_opt.cl deleted file mode 100644 index d35d88e..0000000 --- a/kernels/fft2d/fft2d_bram_opt.cl +++ /dev/null @@ -1,287 +0,0 @@ -// Author: Arjun Ramaswami - -#include "fft_config.h" -#include "fft_8.cl" -#include "../matrixTranspose/diagonal_bitrev.cl" - -#pragma OPENCL EXTENSION cl_intel_channels : enable -channel float2 chaninfft2da[POINTS] __attribute__((depth(POINTS))); -channel float2 chaninfft2db[POINTS] __attribute__((depth(POINTS))); - -channel float2 chaninTranspose[POINTS] __attribute__((depth(POINTS))); -channel float2 chaninTransStore[POINTS] __attribute__((depth(POINTS))); - -kernel void fetchBitrev(global volatile float2 * restrict src, int how_many) { - unsigned delay = (1 << (LOGN - LOGPOINTS)); // N / 8 - bool is_bitrevA = false; - - float2 __attribute__((memory, numbanks(8))) buf[2][N]; - - // additional iterations to fill the buffers - for(unsigned step = 0; step < (how_many * DEPTH) + delay; step++){ - - unsigned where = (step & ((N * DEPTH) - 1)) * 8; - - float2x8 data; - if (step < (how_many * DEPTH)) { - data.i0 = src[where + 0]; - data.i1 = src[where + 1]; - data.i2 = src[where + 2]; - data.i3 = src[where + 3]; - data.i4 = src[where + 4]; - data.i5 = src[where + 5]; - data.i6 = src[where + 6]; - data.i7 = src[where + 7]; - } else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } - - is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; - - unsigned row = step & (DEPTH - 1); - data = bitreverse_fetch(data, - is_bitrevA ? buf[0] : buf[1], - is_bitrevA ? buf[1] : buf[0], - row); - - if (step >= delay) { - write_channel_intel(chaninfft2da[0], data.i0); - write_channel_intel(chaninfft2da[1], data.i1); - write_channel_intel(chaninfft2da[2], data.i2); - write_channel_intel(chaninfft2da[3], data.i3); - write_channel_intel(chaninfft2da[4], data.i4); - write_channel_intel(chaninfft2da[5], data.i5); - write_channel_intel(chaninfft2da[6], data.i6); - write_channel_intel(chaninfft2da[7], data.i7); - } - } -} - -kernel void fft2da(int inverse, int how_many) { - - /* The FFT engine requires a sliding window for data reordering; data stored - * in this array is carried across loop iterations and shifted by 1 element - * every iteration; all loop dependencies derived from the uses of this - * array are simple transfers between adjacent array elements - */ - - float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; - - // needs to run "N / 8 - 1" additional iterations to drain the last outputs - #pragma loop_coalesce - for(unsigned j = 0; j < how_many; j++){ - for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { - float2x8 data; - - // Read data from channels - if (i < N * (N / POINTS)) { - data.i0 = read_channel_intel(chaninfft2da[0]); - data.i1 = read_channel_intel(chaninfft2da[1]); - data.i2 = read_channel_intel(chaninfft2da[2]); - data.i3 = read_channel_intel(chaninfft2da[3]); - data.i4 = read_channel_intel(chaninfft2da[4]); - data.i5 = read_channel_intel(chaninfft2da[5]); - data.i6 = read_channel_intel(chaninfft2da[6]); - data.i7 = read_channel_intel(chaninfft2da[7]); - } - else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } - - // Perform one FFT step - data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); - - // Write result to channels - if (i >= N / POINTS - 1) { - write_channel_intel(chaninTranspose[0], data.i0); - write_channel_intel(chaninTranspose[1], data.i1); - write_channel_intel(chaninTranspose[2], data.i2); - write_channel_intel(chaninTranspose[3], data.i3); - write_channel_intel(chaninTranspose[4], data.i4); - write_channel_intel(chaninTranspose[5], data.i5); - write_channel_intel(chaninTranspose[6], data.i6); - write_channel_intel(chaninTranspose[7], data.i7); - } - } - } -} - -kernel void transpose(int how_many) { - const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 - bool is_bufA = false, is_bitrevA = false; - - float2 buf[2][DEPTH][POINTS]; - float2 bitrev_in[2][N]; - float2 __attribute__((memory, numbanks(8))) bitrev_out[2][N]; - - int initial_delay = DELAY + DELAY; // for each of the bitrev buffer - - // additional iterations to fill the buffers - for(int step = -initial_delay; step < ((how_many * DEPTH) + DEPTH); step++){ - - float2x8 data, data_out; - if (step < ((how_many * DEPTH) - initial_delay)) { - data.i0 = read_channel_intel(chaninTranspose[0]); - data.i1 = read_channel_intel(chaninTranspose[1]); - data.i2 = read_channel_intel(chaninTranspose[2]); - data.i3 = read_channel_intel(chaninTranspose[3]); - data.i4 = read_channel_intel(chaninTranspose[4]); - data.i5 = read_channel_intel(chaninTranspose[5]); - data.i6 = read_channel_intel(chaninTranspose[6]); - data.i7 = read_channel_intel(chaninTranspose[7]); - } else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } - - // Swap buffers every N*N/8 iterations - // starting from the additional delay of N/8 iterations - is_bufA = (( (step + DELAY) & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; - - // Swap bitrev buffers every N/8 iterations - is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; - - unsigned row = step & (DEPTH - 1); - data = bitreverse_in(data, - is_bitrevA ? bitrev_in[0] : bitrev_in[1], - is_bitrevA ? bitrev_in[1] : bitrev_in[0], - row); - - writeBuf(data, - is_bufA ? buf[0] : buf[1], - step, DELAY); - - data_out = readBuf( - is_bufA ? buf[1] : buf[0], - step); - - unsigned start_row = (step + DELAY) & (DEPTH -1); - data_out = bitreverse_out( - is_bitrevA ? bitrev_out[0] : bitrev_out[1], - is_bitrevA ? bitrev_out[1] : bitrev_out[0], - data_out, start_row); - - if (step >= (DEPTH)) { - write_channel_intel(chaninfft2db[0], data_out.i0); - write_channel_intel(chaninfft2db[1], data_out.i1); - write_channel_intel(chaninfft2db[2], data_out.i2); - write_channel_intel(chaninfft2db[3], data_out.i3); - write_channel_intel(chaninfft2db[4], data_out.i4); - write_channel_intel(chaninfft2db[5], data_out.i5); - write_channel_intel(chaninfft2db[6], data_out.i6); - write_channel_intel(chaninfft2db[7], data_out.i7); - } - } -} - -kernel void fft2db(int inverse, int how_many) { - - /* The FFT engine requires a sliding window for data reordering; data stored - * in this array is carried across loop iterations and shifted by 1 element - * every iteration; all loop dependencies derived from the uses of this - * array are simple transfers between adjacent array elements - */ - - float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; - - #pragma loop_coalesce - for(unsigned j = 0; j < 1; j++){ - for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { - float2x8 data; - - // Read data from channels - if (i < N * (N / POINTS)) { - data.i0 = read_channel_intel(chaninfft2db[0]); - data.i1 = read_channel_intel(chaninfft2db[1]); - data.i2 = read_channel_intel(chaninfft2db[2]); - data.i3 = read_channel_intel(chaninfft2db[3]); - data.i4 = read_channel_intel(chaninfft2db[4]); - data.i5 = read_channel_intel(chaninfft2db[5]); - data.i6 = read_channel_intel(chaninfft2db[6]); - data.i7 = read_channel_intel(chaninfft2db[7]); - } else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } - - // Perform one FFT step - data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); - - // Write result to channels - if (i >= N / POINTS - 1) { - write_channel_intel(chaninTransStore[0], data.i0); - write_channel_intel(chaninTransStore[1], data.i1); - write_channel_intel(chaninTransStore[2], data.i2); - write_channel_intel(chaninTransStore[3], data.i3); - write_channel_intel(chaninTransStore[4], data.i4); - write_channel_intel(chaninTransStore[5], data.i5); - write_channel_intel(chaninTransStore[6], data.i6); - write_channel_intel(chaninTransStore[7], data.i7); - } - } - } -} - -kernel void transposeStore(global volatile float2 * restrict dest, int how_many) { - - const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 - bool is_bufA = false, is_bitrevA = false; - - float2 buf[2][DEPTH][POINTS]; - float2 bitrev_in[2][N]; - - int initial_delay = DELAY; // for each of the bitrev buffer - // additional iterations to fill the buffers - for(int step = -initial_delay; step < ((how_many * DEPTH) + DEPTH); step++){ - - float2x8 data, data_out; - if (step < ((how_many * DEPTH) - initial_delay)) { - data.i0 = read_channel_intel(chaninTransStore[0]); - data.i1 = read_channel_intel(chaninTransStore[1]); - data.i2 = read_channel_intel(chaninTransStore[2]); - data.i3 = read_channel_intel(chaninTransStore[3]); - data.i4 = read_channel_intel(chaninTransStore[4]); - data.i5 = read_channel_intel(chaninTransStore[5]); - data.i6 = read_channel_intel(chaninTransStore[6]); - data.i7 = read_channel_intel(chaninTransStore[7]); - } else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } - // Swap buffers every N*N/8 iterations - // starting from the additional delay of N/8 iterations - is_bufA = (( step & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; - - // Swap bitrev buffers every N/8 iterations - is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; - - unsigned row = step & (DEPTH - 1); - data = bitreverse_in(data, - is_bitrevA ? bitrev_in[0] : bitrev_in[1], - is_bitrevA ? bitrev_in[1] : bitrev_in[0], - row); - - writeBuf(data, - is_bufA ? buf[0] : buf[1], - step, 0); - - data_out = readBuf_store( - is_bufA ? buf[1] : buf[0], - step); - - if (step >= (DEPTH)) { - unsigned index = (step - DEPTH) * 8; - - dest[index + 0] = data_out.i0; - dest[index + 1] = data_out.i1; - dest[index + 2] = data_out.i2; - dest[index + 3] = data_out.i3; - dest[index + 4] = data_out.i4; - dest[index + 5] = data_out.i5; - dest[index + 6] = data_out.i6; - dest[index + 7] = data_out.i7; - } - } -} \ No newline at end of file diff --git a/kernels/fft3d/CMakeLists.txt b/kernels/fft3d/CMakeLists.txt index d95e4b1..c48ebc9 100644 --- a/kernels/fft3d/CMakeLists.txt +++ b/kernels/fft3d/CMakeLists.txt @@ -9,10 +9,9 @@ cmake_minimum_required(VERSION 3.10) # - ${kernel_name}_syn: to generate synthesis binary ## set(CL_PATH "${fftkernelsfpga_SOURCE_DIR}/fft3d") -set(kernels fft3d_bram fft3d_bram_triv fft3d_ddr fft3d_ddr_triv - fft3d_ddr_batch) +set(kernels fft3d_bram fft3d_ddr fft3d_ddr_batch) -include(${fftkernelsfpga_SOURCE_DIR}/cmake/genKernelTargets.cmake) +include(${fft_SOURCE_DIR}/cmake/genKernelTargets.cmake) if (INTELFPGAOPENCL_FOUND) gen_fft_targets(${kernels}) diff --git a/kernels/fft3d/fft3d_bram_triv.cl b/kernels/fft3d/fft3d_bram_triv.cl deleted file mode 100755 index e23ee64..0000000 --- a/kernels/fft3d/fft3d_bram_triv.cl +++ /dev/null @@ -1,317 +0,0 @@ -// Author: Arjun Ramaswami - -#include "fft_8.cl" - -// Source the log(size) (log(1k) = 10) from a header shared with the host code -#include "fft_config.h" - -#pragma OPENCL EXTENSION cl_intel_channels : enable -channel float2 chaninfft[8] __attribute__((depth(8))); -channel float2 chanoutfft[8] __attribute__((depth(8))); -channel float2 chaninfft2[8] __attribute__((depth(8))); -channel float2 chanoutfft2[8] __attribute__((depth(8))); -channel float2 chaninfetch[8] __attribute__((depth(8))); - - -int bit_reversed(int x, int bits) { - int y = 0; - #pragma unroll - for (int i = 0; i < bits; i++) { - y <<= 1; - y |= x & 1; - x >>= 1; - } - return y; -} - -void sendTofft(float2 *buffer, unsigned j){ - write_channel_intel(chaninfft[0], buffer[j]); // 0 - write_channel_intel(chaninfft[1], buffer[4 * N / 8 + j]); // 32 - write_channel_intel(chaninfft[2], buffer[2 * N / 8 + j]); // 16 - write_channel_intel(chaninfft[3], buffer[6 * N / 8 + j]); // 48 - write_channel_intel(chaninfft[4], buffer[N / 8 + j]); // 8 - write_channel_intel(chaninfft[5], buffer[5 * N / 8 + j]); // 40 - write_channel_intel(chaninfft[6], buffer[3 * N / 8 + j]); // 24 - write_channel_intel(chaninfft[7], buffer[7 * N / 8 + j]); // 54 -} - -// Kernel that fetches data from global memory -kernel void fetch(global volatile float2 * restrict src) { - - for(unsigned k = 0; k < (1 << (LOGN + LOGN)); k++){ - - float2 buf[N]; - #pragma unroll 8 - for(unsigned i = 0; i < N; i++){ - buf[i & ((1<= N / POINTS - 1) { - write_channel_intel(chanoutfft[0], data.i0); - write_channel_intel(chanoutfft[1], data.i1); - write_channel_intel(chanoutfft[2], data.i2); - write_channel_intel(chanoutfft[3], data.i3); - write_channel_intel(chanoutfft[4], data.i4); - write_channel_intel(chanoutfft[5], data.i5); - write_channel_intel(chanoutfft[6], data.i6); - write_channel_intel(chanoutfft[7], data.i7); - } - } - } -} - -// Transposes fetched data; stores them to global memory -kernel void transpose(global float2 * restrict dest) { - - unsigned revcolt, where_read, where_write, where; - - local float2 buf[N * N]; - - // Perform N times N*N transpositions and transfers - for(unsigned p = 0; p < N; p++){ - - for(unsigned i = 0; i < N; i++){ - for(unsigned k = 0; k < (N / 8); k++){ - where_read = ((i << LOGN) + (k << LOGPOINTS)); - - buf[where_read + 0] = read_channel_intel(chanoutfft[0]); - buf[where_read + 1] = read_channel_intel(chanoutfft[1]); - buf[where_read + 2] = read_channel_intel(chanoutfft[2]); - buf[where_read + 3] = read_channel_intel(chanoutfft[3]); - buf[where_read + 4] = read_channel_intel(chanoutfft[4]); - buf[where_read + 5] = read_channel_intel(chanoutfft[5]); - buf[where_read + 6] = read_channel_intel(chanoutfft[6]); - buf[where_read + 7] = read_channel_intel(chanoutfft[7]); - } - } - - for(unsigned i = 0; i < N; i++){ - revcolt = bit_reversed(i, LOGN); - - for(unsigned k = 0; k < (N / 8); k++){ - where_write = ((k * N) + revcolt); - - write_channel_intel(chaninfft2[0], buf[where_write]); // 0 - write_channel_intel(chaninfft2[1], buf[where_write + 4 * (N / 8) * N]); // 32 - write_channel_intel(chaninfft2[2], buf[where_write + 2 * (N / 8) * N]); // 16 - write_channel_intel(chaninfft2[3], buf[where_write + 6 * (N / 8) * N]); // 48 - write_channel_intel(chaninfft2[4], buf[where_write + (N / 8) * N]); // 8 - write_channel_intel(chaninfft2[5], buf[where_write + 5 * (N / 8) * N]); // 40 - write_channel_intel(chaninfft2[6], buf[where_write + 3 * (N / 8) * N]); // 24 - write_channel_intel(chaninfft2[7], buf[where_write + 7 * (N / 8) * N]); // 54 - } - } - } - - for(unsigned p = 0; p < N; p++){ - - for(unsigned i = 0; i < N; i++){ - for(unsigned j = 0; j < (N / 8); j++){ - where = ((i << LOGN) + (j << LOGPOINTS)); - - buf[where + 0] = read_channel_intel(chanoutfft[0]); - buf[where + 1] = read_channel_intel(chanoutfft[1]); - buf[where + 2] = read_channel_intel(chanoutfft[2]); - buf[where + 3] = read_channel_intel(chanoutfft[3]); - buf[where + 4] = read_channel_intel(chanoutfft[4]); - buf[where + 5] = read_channel_intel(chanoutfft[5]); - buf[where + 6] = read_channel_intel(chanoutfft[6]); - buf[where + 7] = read_channel_intel(chanoutfft[7]); - } - } - - for(unsigned i = 0; i < N; i++){ - revcolt = bit_reversed(i, LOGN); - where = ( (i << (LOGN + LOGN)) + (p << LOGN)); - - #pragma unroll 8 - for( unsigned q = 0; q < N; q++){ - dest[where + q] = buf[(q << LOGN) + revcolt]; - } - } - - } - -} - -kernel void fft3db(int inverse) { - - /* The FFT engine requires a sliding window for data reordering; data stored - * in this array is carried across loop iterations and shifted by 1 element - * every iteration; all loop dependencies derived from the uses of this - * array are simple transfers between adjacent array elements - */ - - float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; - for( int j = 0; j < N; j++){ - - for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { - float2x8 data; - - // Read data from channels - if (i < N * (N / POINTS)) { - data.i0 = read_channel_intel(chaninfft2[0]); - data.i1 = read_channel_intel(chaninfft2[1]); - data.i2 = read_channel_intel(chaninfft2[2]); - data.i3 = read_channel_intel(chaninfft2[3]); - data.i4 = read_channel_intel(chaninfft2[4]); - data.i5 = read_channel_intel(chaninfft2[5]); - data.i6 = read_channel_intel(chaninfft2[6]); - data.i7 = read_channel_intel(chaninfft2[7]); - } else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } - - // Perform one FFT step - data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); - - // Write result to channels - if (i >= N / POINTS - 1) { - write_channel_intel(chanoutfft2[0], data.i0); - write_channel_intel(chanoutfft2[1], data.i1); - write_channel_intel(chanoutfft2[2], data.i2); - write_channel_intel(chanoutfft2[3], data.i3); - write_channel_intel(chanoutfft2[4], data.i4); - write_channel_intel(chanoutfft2[5], data.i5); - write_channel_intel(chanoutfft2[6], data.i6); - write_channel_intel(chanoutfft2[7], data.i7); - } - } - - } -} - -// Stores data for 3rd dim FFT -kernel void transpose3D(){ - unsigned revcolt, where; - unsigned where_test; - - local float2 buf_3d[N * N * N]; - local float2 buf[N * N]; - - // perform N*N*N writes to buffer - for(unsigned m = 0; m < N; m++){ - - for(unsigned i = 0; i < N; i++){ - for(unsigned j = 0; j < (N / 8); j++){ - where = ((i << LOGN) + (j << LOGPOINTS)); - - buf[where + 0] = read_channel_intel(chanoutfft2[0]); - buf[where + 1] = read_channel_intel(chanoutfft2[1]); - buf[where + 2] = read_channel_intel(chanoutfft2[2]); - buf[where + 3] = read_channel_intel(chanoutfft2[3]); - buf[where + 4] = read_channel_intel(chanoutfft2[4]); - buf[where + 5] = read_channel_intel(chanoutfft2[5]); - buf[where + 6] = read_channel_intel(chanoutfft2[6]); - buf[where + 7] = read_channel_intel(chanoutfft2[7]); - } - } - - for(unsigned i = 0; i < N; i++){ - revcolt = bit_reversed(i, LOGN); - where = (i << LOGN) + (m << (LOGN + LOGN)); - - #pragma unroll 8 - for( unsigned u = 0; u < N; u++){ - buf_3d[where + u] = buf[(u << LOGN) + revcolt]; - } - } - - } - - // Flush entire 3d buffer transposed through channels - for(unsigned m = 0; m < N; m++){ - - for(unsigned i = 0; i < N; i++){ - where = ((i << (LOGN + LOGN)) + ( m << LOGN)); - - #pragma unroll 8 - for(unsigned u = 0; u < N; u++){ - buf[(i << LOGN) + u] = buf_3d[where + u]; - } - } - - for( unsigned i = 0; i < N; i++){ - for( unsigned j = 0; j < (N / 8); j++){ - where = (j * N * 8) + i; - - write_channel_intel(chaninfetch[0], buf[where + (0 << LOGN)]); - write_channel_intel(chaninfetch[1], buf[where + (1 << LOGN)]); - write_channel_intel(chaninfetch[2], buf[where + (2 << LOGN)]); - write_channel_intel(chaninfetch[3], buf[where + (3 << LOGN)]); - write_channel_intel(chaninfetch[4], buf[where + (4 << LOGN)]); - write_channel_intel(chaninfetch[5], buf[where + (5 << LOGN)]); - write_channel_intel(chaninfetch[6], buf[where + (6 << LOGN)]); - write_channel_intel(chaninfetch[7], buf[where + (7 << LOGN)]); - } - } - - } - -} \ No newline at end of file diff --git a/kernels/fft3d/fft3d_ddr_triv.cl b/kernels/fft3d/fft3d_ddr_triv.cl deleted file mode 100755 index 6355285..0000000 --- a/kernels/fft3d/fft3d_ddr_triv.cl +++ /dev/null @@ -1,389 +0,0 @@ -// Author: Arjun Ramaswami - -#include "fft_8.cl" - -// Source the log(size) (log(1k) = 10) from a header shared with the host code -#include "fft_config.h" - -#pragma OPENCL EXTENSION cl_intel_channels : enable -channel float2 chaninfft1[8] __attribute__((depth(8))); -channel float2 chanoutfft1[8] __attribute__((depth(8))); - -channel float2 chaninfft2[8] __attribute__((depth(8))); -channel float2 chanoutfft2[8] __attribute__((depth(8))); - -channel float2 chaninfft3[8] __attribute__((depth(8))); -channel float2 chanoutfft3[8] __attribute__((depth(8))); - -int bit_reversed(int x, int bits) { - int y = 0; - #pragma unroll - for (int i = 0; i < bits; i++) { - y <<= 1; - y |= x & 1; - x >>= 1; - } - return y; -} - -// Kernel that fetches data from global memory -kernel void fetch1(global volatile float2 * restrict src1) { - - for(unsigned k = 0; k < (N * N); k++){ - float2 buf[N]; - - #pragma unroll 8 - for(unsigned i = 0; i < N; i++){ - buf[i & ((1<= N / POINTS - 1) { - write_channel_intel(chanoutfft1[0], data.i0); - write_channel_intel(chanoutfft1[1], data.i1); - write_channel_intel(chanoutfft1[2], data.i2); - write_channel_intel(chanoutfft1[3], data.i3); - write_channel_intel(chanoutfft1[4], data.i4); - write_channel_intel(chanoutfft1[5], data.i5); - write_channel_intel(chanoutfft1[6], data.i6); - write_channel_intel(chanoutfft1[7], data.i7); - } - } - } -} - -// Transposes fetched data; stores them to global memory -kernel void transpose(){ - - unsigned revcolt, where, where_write; - - local float2 buf[N * N]; - - // Perform N times N*N transpositions and transfers - for(unsigned p = 0; p < N; p++){ - - for(unsigned i = 0; i < N; i++){ - for(unsigned k = 0; k < (N / 8); k++){ - where = ((i << LOGN) + (k << LOGPOINTS)); - - buf[where + 0] = read_channel_intel(chanoutfft1[0]); - buf[where + 1] = read_channel_intel(chanoutfft1[1]); - buf[where + 2] = read_channel_intel(chanoutfft1[2]); - buf[where + 3] = read_channel_intel(chanoutfft1[3]); - buf[where + 4] = read_channel_intel(chanoutfft1[4]); - buf[where + 5] = read_channel_intel(chanoutfft1[5]); - buf[where + 6] = read_channel_intel(chanoutfft1[6]); - buf[where + 7] = read_channel_intel(chanoutfft1[7]); - } - } - - for(unsigned i = 0; i < N; i++){ - revcolt = bit_reversed(i, LOGN); - - for(unsigned k = 0; k < (N / 8); k++){ - where_write = ((k * N) + revcolt); - - write_channel_intel(chaninfft2[0], buf[where_write]); // 0 - write_channel_intel(chaninfft2[1], buf[where_write + 4 * (N / 8) * N]); // 32 - write_channel_intel(chaninfft2[2], buf[where_write + 2 * (N / 8) * N]); // 16 - write_channel_intel(chaninfft2[3], buf[where_write + 6 * (N / 8) * N]); // 48 - write_channel_intel(chaninfft2[4], buf[where_write + (N / 8) * N]); // 8 - write_channel_intel(chaninfft2[5], buf[where_write + 5 * (N / 8) * N]); // 40 - write_channel_intel(chaninfft2[6], buf[where_write + 3 * (N / 8) * N]); // 24 - write_channel_intel(chaninfft2[7], buf[where_write + 7 * (N / 8) * N]); // 54 - } - } - } -} - -kernel void fft3db(int inverse) { - - /* The FFT engine requires a sliding window for data reordering; data stored - * in this array is carried across loop iterations and shifted by 1 element - * every iteration; all loop dependencies derived from the uses of this - * array are simple transfers between adjacent array elements - */ - - float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; - for( int j = 0; j < N; j++){ - - for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { - float2x8 data; - - // Read data from channels - if (i < N * (N / POINTS)) { - data.i0 = read_channel_intel(chaninfft2[0]); - data.i1 = read_channel_intel(chaninfft2[1]); - data.i2 = read_channel_intel(chaninfft2[2]); - data.i3 = read_channel_intel(chaninfft2[3]); - data.i4 = read_channel_intel(chaninfft2[4]); - data.i5 = read_channel_intel(chaninfft2[5]); - data.i6 = read_channel_intel(chaninfft2[6]); - data.i7 = read_channel_intel(chaninfft2[7]); - } else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } - - // Perform one FFT step - data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); - - // Write result to channels - if (i >= N / POINTS - 1) { - write_channel_intel(chanoutfft2[0], data.i0); - write_channel_intel(chanoutfft2[1], data.i1); - write_channel_intel(chanoutfft2[2], data.i2); - write_channel_intel(chanoutfft2[3], data.i3); - write_channel_intel(chanoutfft2[4], data.i4); - write_channel_intel(chanoutfft2[5], data.i5); - write_channel_intel(chanoutfft2[6], data.i6); - write_channel_intel(chanoutfft2[7], data.i7); - } - } - } -} - -/* - * Input through channels in bit reversed format - */ -__kernel -void store1(__global __attribute__((buffer_location(BUFFER_LOCATION))) volatile float2 * restrict dest1){ - - local float2 buf[N * N]; - - for(unsigned zdim = 0; zdim < N; zdim++){ - - // Store yx plane in buffer, ydim in bit reversed format - for(unsigned xdim = 0; xdim < N; xdim++){ - for(unsigned ydim = 0; ydim < (N / 8); ydim++){ - unsigned where = ((xdim * N) + (ydim * POINTS)); - - buf[where + 0] = read_channel_intel(chanoutfft2[0]); - buf[where + 1] = read_channel_intel(chanoutfft2[1]); - buf[where + 2] = read_channel_intel(chanoutfft2[2]); - buf[where + 3] = read_channel_intel(chanoutfft2[3]); - buf[where + 4] = read_channel_intel(chanoutfft2[4]); - buf[where + 5] = read_channel_intel(chanoutfft2[5]); - buf[where + 6] = read_channel_intel(chanoutfft2[6]); - buf[where + 7] = read_channel_intel(chanoutfft2[7]); - } - } // stored yx plane in buffer - - for(unsigned ydim = 0; ydim < N; ydim++){ - // bit reverse rows / ydim to get back normal order - unsigned revcolt = bit_reversed(ydim, LOGN); - - unsigned ddr_loc = (zdim * N * N) + (ydim * N); - - #pragma unroll 8 - for( unsigned xdim = 0; xdim < N; xdim++){ - dest1[ddr_loc + xdim] = buf[(xdim * N) + revcolt]; - } - } - } // stored N*N*N points in DDR -} - -// Kernel that fetches data from global memory -__kernel -void fetch2(__global __attribute__((buffer_location(BUFFER_LOCATION))) volatile float2 * restrict src2){ - - local float2 buf[N * N]; - - for(unsigned ydim = 0; ydim < N; ydim++){ - /* - * Store xz plane in the buffer - */ - for(unsigned i = 0; i < N; i++){ - unsigned ddr_loc = ( (i * N * N) + (ydim * N) ); - - #pragma unroll 8 - for(unsigned xdim = 0; xdim < N; xdim++){ - buf[(i * N) + xdim] = src2[ddr_loc + xdim]; - } - } - - /* Transpose xz plane i.e. zx - * Transfer bit reverse input to FFT - */ - for(unsigned i = 0; i < N; i++){ - - for(unsigned k = 0; k < (N / 8); k++){ - unsigned where = i + (k * N); - - write_channel_intel(chaninfft3[0], buf[where]); // 0 - write_channel_intel(chaninfft3[1], buf[where + 4 * (N / 8) * N]); // 32 - write_channel_intel(chaninfft3[2], buf[where + 2 * (N / 8) * N]); // 16 - write_channel_intel(chaninfft3[3], buf[where + 6 * (N / 8) * N]); // 48 - write_channel_intel(chaninfft3[4], buf[where + (N / 8) * N]); // 8 - write_channel_intel(chaninfft3[5], buf[where + 5 * (N / 8) * N]); // 40 - write_channel_intel(chaninfft3[6], buf[where + 3 * (N / 8) * N]); // 24 - write_channel_intel(chaninfft3[7], buf[where + 7 * (N / 8) * N]); // 54 - } - } - } // y axis -} - -/* - * Input and output data in bit-reversed format - */ -kernel void fft3dc(int inverse) { - - /* The FFT engine requires a sliding window for data reordering; data stored - * in this array is carried across loop iterations and shifted by 1 element - * every iteration; all loop dependencies derived from the uses of this - * array are simple transfers between adjacent array elements - */ - - float2 fft_delay_elements[N + POINTS * (LOGN - 2)]; - for( int j = 0; j < N; j++){ - - for (unsigned i = 0; i < N * (N / POINTS) + N / POINTS - 1; i++) { - float2x8 data; - - // Read data from channels - if (i < N * (N / POINTS)) { - data.i0 = read_channel_intel(chaninfft3[0]); - data.i1 = read_channel_intel(chaninfft3[1]); - data.i2 = read_channel_intel(chaninfft3[2]); - data.i3 = read_channel_intel(chaninfft3[3]); - data.i4 = read_channel_intel(chaninfft3[4]); - data.i5 = read_channel_intel(chaninfft3[5]); - data.i6 = read_channel_intel(chaninfft3[6]); - data.i7 = read_channel_intel(chaninfft3[7]); - } else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } - - // Perform one FFT step - data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); - - // Write result to channels - if (i >= N / POINTS - 1) { - write_channel_intel(chanoutfft3[0], data.i0); - write_channel_intel(chanoutfft3[1], data.i1); - write_channel_intel(chanoutfft3[2], data.i2); - write_channel_intel(chanoutfft3[3], data.i3); - write_channel_intel(chanoutfft3[4], data.i4); - write_channel_intel(chanoutfft3[5], data.i5); - write_channel_intel(chanoutfft3[6], data.i6); - write_channel_intel(chanoutfft3[7], data.i7); - } - } - } -} - -/* - * input through channels: transformed zx planes - * - values in the z axis is in bitreversed format - */ -kernel void store2(global float2 * restrict dest2){ - - local float2 buf[N * N]; - - for(unsigned ydim = 0; ydim < N; ydim++){ - - /* - * Store zx plane in 2d buffer in bit reversed format - * - outer loop iterates rows - * - inner loop stores elements of each row / zdim in bursts of POINTS (8) - */ - for(unsigned xdim = 0; xdim < N; xdim++){ - for(unsigned zdim = 0; zdim < (N / 8); zdim++){ - - // xdim * N iterates through the 2nd dim, here x - unsigned where = ((xdim * N) + (zdim * POINTS)); - - buf[where + 0] = read_channel_intel(chanoutfft3[0]); - buf[where + 1] = read_channel_intel(chanoutfft3[1]); - buf[where + 2] = read_channel_intel(chanoutfft3[2]); - buf[where + 3] = read_channel_intel(chanoutfft3[3]); - buf[where + 4] = read_channel_intel(chanoutfft3[4]); - buf[where + 5] = read_channel_intel(chanoutfft3[5]); - buf[where + 6] = read_channel_intel(chanoutfft3[6]); - buf[where + 7] = read_channel_intel(chanoutfft3[7]); - - } - } // zx plane stored in buffer - - /* - * Transpose and bitreverse the zx plane in 2d buffer to xz, - * then store in global memory - * - outer loop iterates through the rows / zdim - * - inner loop iterates through each column - * - selects elements based from bit reversed indices - */ - for(unsigned zdim = 0; zdim < N; zdim++){ - - // write to ddr in planes of xz - unsigned ddr_loc = ( (ydim * N) + (zdim * N * N) ); - - /* - * Read column-wise in buffer as a transpose of zx to xz plane - * store in ddr row-wise (xdim) then zdim - * 1. bit reverse z axis - revcolt(z) - * 2. transpose zx to xz - xdim * N - * : combine both to read the bitreversed column directly - buf_loc - */ - unsigned revcolt = bit_reversed(zdim, LOGN); - - #pragma unroll 8 - for(unsigned xdim = 0; xdim < N; xdim++){ - unsigned buf_loc = revcolt + (xdim * N); - dest2[ddr_loc + xdim] = buf[buf_loc]; - } - } // stored 2d buffer to ddr - - } // stored entire 3d points to ddr -} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index bf3c368..674bba2 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -33,7 +33,7 @@ endif() add_dependencies(test_fftfpga fft3d_bram_emu) add_dependencies(test_fftfpga fft3d_ddr_emu) -add_dependencies(test_fftfpga fft2d_bram_opt_emu) +add_dependencies(test_fftfpga fft2d_bram_emu) add_dependencies(test_fftfpga fft2d_ddr_emu) add_dependencies(test_fftfpga fft1d_emu) From 20f78b4192ec37e7fae8e57eb3b39e7625f80543 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Mon, 4 Oct 2021 14:59:27 +0200 Subject: [PATCH 46/76] working ddr batch --- api/src/fft3d.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/api/src/fft3d.c b/api/src/fft3d.c index cfdcb34..0b5385f 100644 --- a/api/src/fft3d.c +++ b/api/src/fft3d.c @@ -755,7 +755,8 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, const float2 *inp, float2 *out, bool inv checkError(status, "Failed to launch transpose kernel"); // Check finish of transfer and computations - /* + clWaitForEvents(1, &write_event[0]); + clReleaseEvent(write_event[0]); status = clFinish(queue6); checkError(status, "failed to finish"); status = clFinish(queue5); @@ -768,9 +769,6 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, const float2 *inp, float2 *out, bool inv checkError(status, "failed to finish"); status = clFinish(queue1); checkError(status, "failed to finish"); - */ - clWaitForEvents(1, &write_event[0]); - clReleaseEvent(write_event[0]); // Loop over the 3 stages for(size_t i = 0; i < how_many-2; i++){ @@ -1037,8 +1035,8 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, const float2 *inp, float2 *out, bool inv if (d_inData4) clReleaseMemObject(d_inData4); - if (d_outData2) - clReleaseMemObject(d_outData2); + if (d_outData1) + clReleaseMemObject(d_outData1); if (d_outData2) clReleaseMemObject(d_outData2); if (d_outData3) From 8be7c922e25fb9902bd7c5bd76f85f55d6116f78 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Thu, 7 Oct 2021 17:06:46 +0200 Subject: [PATCH 47/76] streamlined synth path --- api/CMakeLists.txt | 1 + api/include/fftfpga/fftfpga.h | 6 +- api/src/fft3d.c | 520 +-------------------------------- api/src/fft3d_svm.c | 521 ++++++++++++++++++++++++++++++++++ api/src/fftfpga.c | 12 +- api/src/opencl_utils.c | 4 +- cmake/genKernelTargets.cmake | 10 +- examples/fft.cpp | 11 +- examples/helper.cpp | 19 +- examples/helper.hpp | 1 + kernels/CMakeLists.txt | 83 +++--- kernels/fft3d/CMakeLists.txt | 2 +- 12 files changed, 616 insertions(+), 574 deletions(-) create mode 100644 api/src/fft3d_svm.c diff --git a/api/CMakeLists.txt b/api/CMakeLists.txt index 13864e0..2f68cd0 100755 --- a/api/CMakeLists.txt +++ b/api/CMakeLists.txt @@ -11,6 +11,7 @@ project(fftfpga VERSION 2.0 add_library(${PROJECT_NAME} STATIC ${PROJECT_SOURCE_DIR}/src/fftfpga.c ${PROJECT_SOURCE_DIR}/src/fft3d.c + ${PROJECT_SOURCE_DIR}/src/fft3d_svm.c ${PROJECT_SOURCE_DIR}/src/fft2d.c ${PROJECT_SOURCE_DIR}/src/fft1d.c ${PROJECT_SOURCE_DIR}/src/svm.c diff --git a/api/include/fftfpga/fftfpga.h b/api/include/fftfpga/fftfpga.h index 54d2189..33b59d7 100755 --- a/api/include/fftfpga/fftfpga.h +++ b/api/include/fftfpga/fftfpga.h @@ -149,7 +149,7 @@ extern fpga_t fftfpgaf_c2c_2d_ddr(int N, const float2 *inp, float2 *out, bool in * @param interleaving : enable burst interleaved global memory buffers * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpgaf_c2c_3d_bram(int N, const float2 *inp, float2 *out, bool inv, bool interleaving); +extern fpga_t fftfpgaf_c2c_3d_bram(const unsigned N, const float2 *inp, float2 *out, const bool inv, const bool interleaving); /** * @brief compute an out-of-place single precision complex 3D-FFT using the DDR of the FPGA @@ -172,7 +172,7 @@ extern fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, const float2 *inp, float2 *out, b * @param interleaving : toggle interleaved device memory * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, bool interleaving); +extern fpga_t fftfpgaf_c2c_3d_ddr_svm(const unsigned N, const float2 *inp, float2 *out, const bool inv, const bool interleaving); /** * @brief compute an out-of-place single precision complex 3D-FFT using the DDR of the FPGA and Shared Virtual Memory for Host to Device Communication @@ -183,7 +183,7 @@ extern fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, boo * @param interleaving : toggle interleaved device memory * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, const float2 *inp, float2 *out, bool inv, int how_many); +extern fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(const unsigned N, const float2 *inp, float2 *out, const bool inv, const unsigned how_many); #ifdef __cplusplus } diff --git a/api/src/fft3d.c b/api/src/fft3d.c index 0b5385f..e88e5d0 100644 --- a/api/src/fft3d.c +++ b/api/src/fft3d.c @@ -11,18 +11,11 @@ #include "fpga_state.h" #include "fftfpga/fftfpga.h" -#include "svm.h" #include "opencl_utils.h" #include "misc.h" -#include "/opt/intelFPGA_pro/19.2.0/hld/board/custom_platform_toolkit/mmd/aocl_mmd.h" -#define WR_GLOBALMEM 0 -#define RD_GLOBALMEM 1 #define BATCH 2 -#define NON_BATCH_MODE 0 -#define BATCH_MODE 1 - /** * \brief compute an out-of-place single precision complex 3D-FFT using the BRAM of the FPGA * \param N : integer pointer addressing the size of FFT3d @@ -32,7 +25,7 @@ * \param interleaving : 1 if using burst interleaved global memory buffers * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_3d_bram(int N, const float2 *inp, float2 *out, bool inv, bool interleaving) { +fpga_t fftfpgaf_c2c_3d_bram(const unsigned N, const float2 *inp, float2 *out, const bool inv, const bool interleaving) { fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_int status = 0; @@ -231,7 +224,7 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, const float2 *inp, float2 *out, bool inv) { int inverse_int = (int)inv; // Setup kernels - cl_kernel fetch1_kernel = clCreateKernel(program, "fetchBitrev1", &status); + cl_kernel fetch1_kernel = clCreateKernel(program, "fetch1", &status); checkError(status, "Failed to create fetch1 kernel"); cl_kernel ffta_kernel = clCreateKernel(program, "fft3da", &status); checkError(status, "Failed to create fft3da kernel"); @@ -242,7 +235,7 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, const float2 *inp, float2 *out, bool inv) { cl_kernel store1_kernel = clCreateKernel(program, "transposeStore1", &status); checkError(status, "Failed to create store1 kernel"); - cl_kernel fetch2_kernel = clCreateKernel(program, "fetchBitrev2", &status); + cl_kernel fetch2_kernel = clCreateKernel(program, "fetch2", &status); checkError(status, "Failed to create fetch2 kernel"); cl_kernel fftc_kernel = clCreateKernel(program, "fft3dc", &status); checkError(status, "Failed to create fft3dc kernel"); @@ -391,219 +384,6 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, const float2 *inp, float2 *out, bool inv) { return fft_time; } -/** - * \brief compute an out-of-place single precision complex 3D FFT using the DDR for 3D Transpose where the data access between the host and the FPGA is using Shared Virtual Memory (SVM) - * \param N : integer pointer addressing the size of FFT3d - * \param inp : float2 pointer to input data of size [N * N * N] - * \param out : float2 pointer to output data of size [N * N * N] - * \param inv : int toggle to activate backward FFT - * \param interleaving : 1 if using burst interleaved global memory buffers - * \return fpga_t : time taken in milliseconds for data transfers and execution - */ -fpga_t fftfpgaf_c2c_3d_ddr_svm(int N, const float2 *inp, float2 *out, bool inv, bool interleaving) { - fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, false}; - cl_int status = 0; - unsigned num_pts = N * N * N; - - // 0 - WR_GLOBALMEM, 1 - RD_GLOBALMEM, 2 - BATCH - int mode = WR_GLOBALMEM; - - // if N is not a power of 2 - if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || !(svm_enabled)){ - return fft_time; - } - - // Can't pass bool to device, so convert it to int - int inverse_int = (int)inv; - - // Setup kernels - cl_kernel fetch_kernel = clCreateKernel(program, "fetch", &status); - checkError(status, "Failed to create fetch kernel"); - cl_kernel ffta_kernel = clCreateKernel(program, "fft3da", &status); - checkError(status, "Failed to create fft3da kernel"); - cl_kernel transpose_kernel = clCreateKernel(program, "transpose", &status); - checkError(status, "Failed to create transpose kernel"); - cl_kernel fftb_kernel = clCreateKernel(program, "fft3db", &status); - checkError(status, "Failed to create fft3db kernel"); - cl_kernel transpose3D_kernel= clCreateKernel(program, "transpose3D", &status); - checkError(status, "Failed to create transpose3D kernel"); - - cl_kernel fftc_kernel = clCreateKernel(program, "fft3dc", &status); - checkError(status, "Failed to create fft3dc kernel"); - cl_kernel store_kernel = clCreateKernel(program, "store", &status); - checkError(status, "Failed to create store kernel"); - - // Setup Queues to the kernels - queue_setup(); - - // Device memory buffers - cl_mem d_inOutData; - if(!interleaving){ - d_inOutData = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); - } - else{ - d_inOutData = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float2) * num_pts, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); - } - - // allocate SVM buffers - float2 *h_inData, *h_outData; - h_inData = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); - h_outData = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); - - size_t num_bytes = num_pts * sizeof(float2); - double svm_copyin_t = getTimeinMilliSec(); - - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData, sizeof(float2) * num_pts, 0, NULL, NULL); - checkError(status, "Failed to map input data"); - - // copy data into h_inData - memcpy(h_inData, inp, num_bytes); - - status = clEnqueueSVMUnmap(queue1, (void *)h_inData, 0, NULL, NULL); - checkError(status, "Failed to unmap input data"); - fft_time.svm_copyin_t += getTimeinMilliSec() - svm_copyin_t; - - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); - checkError(status, "Failed to map input data"); - - // copy data into h_inData - memset(&h_outData[0], 0, num_bytes); - - status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); - checkError(status, "Failed to unmap input data"); - - /* - * kernel arguments - */ - // write to fetch kernel using SVM based PCIe - status = clSetKernelArgSVMPointer(fetch_kernel, 0, (void *)h_inData); - checkError(status, "Failed to set fetch kernel arg"); - - status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set ffta kernel arg"); - status=clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fftb kernel arg"); - - // kernel stores to DDR memory - status=clSetKernelArg(transpose3D_kernel, 0, sizeof(cl_mem), (void*)&d_inOutData); - checkError(status, "Failed to set transpose3D kernel arg"); - - // kernel fetches from DDR memory - status=clSetKernelArg(transpose3D_kernel, 1, sizeof(cl_mem), (void*)&d_inOutData); - checkError(status, "Failed to set transpose3D kernel arg"); - - mode = WR_GLOBALMEM; - - status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); - checkError(status, "Failed to set transpose3D kernel arg 2"); - - status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fftc kernel arg"); - - // kernel stores using SVM based PCIe to host - status = clSetKernelArgSVMPointer(store_kernel, 0, (void*)h_outData); - checkError(status, "Failed to set store kernel arg"); - - cl_event startExec_event, endExec_event; - status = clEnqueueTask(queue7, store_kernel, 0, NULL, &endExec_event); - checkError(status, "Failed to launch transpose kernel"); - - status = clEnqueueTask(queue6, fftc_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - - mode = RD_GLOBALMEM; - - status = clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); - checkError(status, "Failed to set transpose3D kernel arg 2"); - - status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - - status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second fft kernel"); - - status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); - - status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, &startExec_event); - checkError(status, "Failed to launch fetch kernel"); - - status = clFinish(queue7); - checkError(status, "failed to finish"); - status = clFinish(queue6); - checkError(status, "failed to finish"); - status = clFinish(queue5); - checkError(status, "failed to finish"); - status = clFinish(queue4); - checkError(status, "failed to finish"); - status = clFinish(queue3); - checkError(status, "failed to finish"); - status = clFinish(queue2); - checkError(status, "failed to finish"); - status = clFinish(queue1); - checkError(status, "failed to finish"); - - cl_ulong kernel_start = 0, kernel_end = 0; - - clGetEventProfilingInfo(startExec_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); - clGetEventProfilingInfo(endExec_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end, NULL); - - fft_time.exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); - - double svm_copyout_t = 0.0; - svm_copyout_t = getTimeinMilliSec(); - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, - (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); - checkError(status, "Failed to map out data"); - - memcpy(out, h_outData, num_bytes); - - status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); - checkError(status, "Failed to unmap out data"); - fft_time.svm_copyout_t += getTimeinMilliSec() - svm_copyout_t; - - if (h_inData) - clSVMFree(context, h_inData); - if (h_outData) - clSVMFree(context, h_outData); - - queue_cleanup(); - - if (d_inOutData) - clReleaseMemObject(d_inOutData); - - if(fetch_kernel) - clReleaseKernel(fetch_kernel); - - if(ffta_kernel) - clReleaseKernel(ffta_kernel); - if(fftb_kernel) - clReleaseKernel(fftb_kernel); - if(fftc_kernel) - clReleaseKernel(fftc_kernel); - - if(transpose_kernel) - clReleaseKernel(transpose_kernel); - - if(transpose3D_kernel) - clReleaseKernel(transpose3D_kernel); - - if(store_kernel) - clReleaseKernel(store_kernel); - - fft_time.valid = true; - return fft_time; -} - - /** * \brief compute an batched out-of-place single precision complex 3D-FFT using the DDR of the FPGA for 3D Transpose * \param N : integer pointer addressing the size of FFT3d @@ -632,7 +412,7 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, const float2 *inp, float2 *out, bool inv int inverse_int = (int)inv; // Setup kernels - cl_kernel fetch1_kernel = clCreateKernel(program, "fetchBitrev1", &status); + cl_kernel fetch1_kernel = clCreateKernel(program, "fetch1", &status); checkError(status, "Failed to create fetch1 kernel"); cl_kernel ffta_kernel = clCreateKernel(program, "fft3da", &status); checkError(status, "Failed to create fft3da kernel"); @@ -643,7 +423,7 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, const float2 *inp, float2 *out, bool inv cl_kernel store1_kernel = clCreateKernel(program, "transposeStore1", &status); checkError(status, "Failed to create store1 kernel"); - cl_kernel fetch2_kernel = clCreateKernel(program, "fetchBitrev2", &status); + cl_kernel fetch2_kernel = clCreateKernel(program, "fetch2", &status); checkError(status, "Failed to create fetch2 kernel"); cl_kernel fftc_kernel = clCreateKernel(program, "fft3dc", &status); checkError(status, "Failed to create fft3dc kernel"); @@ -1075,292 +855,4 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, const float2 *inp, float2 *out, bool inv fft_time.valid = 1; return fft_time; -} - - -/** - * \brief compute an batched out-of-place single precision complex 3D-FFT using the DDR of the FPGA for 3D Transpose and for data transfers between host's main memory and FPGA using Shared Virtual Memory - * \param N : integer pointer addressing the size of FFT3d - * \param inp : float2 pointer to input data of size [N * N * N] - * \param out : float2 pointer to output data of size [N * N * N] - * \param inv : int toggle to activate backward FFT - * \param how_many : number of batched computations - * \return fpga_t : time taken in milliseconds for data transfers and execution - */ -fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(int N, const float2 *inp, float2 *out, bool inv, int how_many) { - fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, false}; - cl_int status = 0; - // 0 - WR_GLOBALMEM, 1 - RD_GLOBALMEM, 2 - BATCH - int mode_transpose = WR_GLOBALMEM; - - // if N is not a power of 2 - if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || (how_many <= 0) || !svm_enabled){ - return fft_time; - } - - // Can't pass bool to device, so convert it to int - int inverse_int = (int)inv; - - // Setup kernels - cl_kernel fetch_kernel = clCreateKernel(program, "fetch", &status); - checkError(status, "Failed to create fetch kernel"); - cl_kernel ffta_kernel = clCreateKernel(program, "fft3da", &status); - checkError(status, "Failed to create fft3da kernel"); - cl_kernel transpose_kernel = clCreateKernel(program, "transpose", &status); - checkError(status, "Failed to create transpose kernel"); - cl_kernel fftb_kernel = clCreateKernel(program, "fft3db", &status); - checkError(status, "Failed to create fft3db kernel"); - cl_kernel transpose3D_kernel = clCreateKernel(program, "transpose3D", &status); - checkError(status, "Failed to create transpose3D kernel"); - - cl_kernel fftc_kernel = clCreateKernel(program, "fft3dc", &status); - checkError(status, "Failed to create fft3dc kernel"); - cl_kernel store_kernel = clCreateKernel(program, "store", &status); - checkError(status, "Failed to create store kernel"); - - // Setup Queues to the kernels - queue_setup(); - - // Device memory buffers: double buffers - unsigned num_pts = N * N * N; - - cl_mem d_inOutData_0 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); - - cl_mem d_inOutData_1 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); - checkError(status, "Failed to allocate output device buffer\n"); - - // allocate and initialize SVM buffers - double svm_copyin_t = 0.0; - float2 *h_inData[how_many], *h_outData[how_many]; - for(size_t i = 0; i < how_many; i++){ - - h_inData[i] = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); - h_outData[i] = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); - - size_t num_bytes = num_pts * sizeof(float2); - - svm_copyin_t = getTimeinMilliSec(); - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData[i], sizeof(float2) * num_pts, 0, NULL, NULL); - checkError(status, "Failed to map input data"); - - // copy data into h_inData - memcpy(&h_inData[i][0], &inp[i*num_pts], num_bytes); - - status = clEnqueueSVMUnmap(queue1, (void *)h_inData[i], 0, NULL, NULL); - checkError(status, "Failed to unmap input data"); - fft_time.svm_copyin_t += getTimeinMilliSec() - svm_copyin_t; - - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_outData[i], sizeof(float2) * num_pts, 0, NULL, NULL); - checkError(status, "Failed to map input data"); - - // set h_outData to 0 - memset(&h_outData[i][0], 0, num_bytes); - - status = clEnqueueSVMUnmap(queue1, (void *)h_outData[i], 0, NULL, NULL); - checkError(status, "Failed to unmap input data"); - } - - /* - * kernel arguments - */ - // write to fetch kernel using SVM based PCIe - status = clSetKernelArgSVMPointer(fetch_kernel, 0, (void *)h_inData[0]); - checkError(status, "Failed to set fetch1 kernel arg"); - - status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set ffta kernel arg"); - // transpose() has no arguments - status=clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fftb kernel arg"); - - // kernel stores to DDR memory - status=clSetKernelArg(transpose3D_kernel, 0, sizeof(cl_mem), (void*)&d_inOutData_1); - checkError(status, "Failed to set transpose3D kernel arg"); - - status=clSetKernelArg(transpose3D_kernel, 1, sizeof(cl_mem), (void*)&d_inOutData_0); - checkError(status, "Failed to set transpose3D kernel arg"); - - mode_transpose = WR_GLOBALMEM; - status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode_transpose); - checkError(status, "Failed to set transpose3D kernel arg"); - - status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); - checkError(status, "Failed to set fftc kernel arg"); - - cl_event startExec_event, endExec_event; - /* - * First batch write phase - */ - fft_time.exec_t = getTimeinMilliSec(); - status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second transpose kernel"); - - status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second fft kernel"); - - status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); - - status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, &startExec_event); - checkError(status, "Failed to launch fetch kernel"); - - status = clFinish(queue1); - checkError(status, "Failed to finish queue1"); - status = clFinish(queue2); - checkError(status, "Failed to finish queue2"); - status = clFinish(queue3); - checkError(status, "Failed to finish queue3"); - status = clFinish(queue4); - checkError(status, "Failed to finish queue4"); - status = clFinish(queue5); - checkError(status, "Failed to finish queue5"); - - for(size_t i = 1; i < how_many; i++){ - - status = clSetKernelArgSVMPointer(fetch_kernel, 0, (void *)h_inData[i]); - checkError(status, "Failed to set fetch kernel arg"); - - status = clSetKernelArg(transpose3D_kernel, 0, sizeof(cl_mem), ((i % 2) == 1) ? (void*)&d_inOutData_0 : (void*)&d_inOutData_1); - checkError(status, "Failed to set transpose3D kernel arg 0"); - - status = clSetKernelArg(transpose3D_kernel, 1, sizeof(cl_mem), ((i % 2) == 1) ? (void*)&d_inOutData_1 : (void*)&d_inOutData_0); - checkError(status, "Failed to set transpose3D kernel arg 1"); - - mode_transpose = BATCH; - status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode_transpose); - checkError(status, "Failed to set transpose3D kernel arg 2"); - - status = clSetKernelArgSVMPointer(store_kernel, 0, (void *)h_outData[i-1]); - checkError(status, "Failed to set store kernel arg"); - - // Enqueue Tasks - status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose3D kernel"); - - status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); - - status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); - - status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second fft kernel"); - - status = clEnqueueTask(queue6, fftc_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clEnqueueTask(queue7, store_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch store kernel"); - - status = clFinish(queue1); - checkError(status, "Failed to finish queue1"); - status = clFinish(queue2); - checkError(status, "Failed to finish queue2"); - status = clFinish(queue3); - checkError(status, "Failed to finish queue3"); - status = clFinish(queue4); - checkError(status, "Failed to finish queue4"); - status = clFinish(queue5); - checkError(status, "Failed to finish queue5"); - status = clFinish(queue6); - checkError(status, "Failed to finish queue6"); - status = clFinish(queue7); - checkError(status, "Failed to finish queue7"); - } - - status = clSetKernelArg(transpose3D_kernel, 0, sizeof(cl_mem), ((how_many % 2) == 0) ? (void*)&d_inOutData_1 : (void*)&d_inOutData_0); - checkError(status, "Failed to set transpose3D kernel arg 0"); - - status = clSetKernelArg(transpose3D_kernel, 1, sizeof(cl_mem), ((how_many % 2) == 0) ? (void*)&d_inOutData_0 : (void*)&d_inOutData_1); - checkError(status, "Failed to set transpose3D kernel arg 1"); - - mode_transpose = RD_GLOBALMEM; - status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode_transpose); - checkError(status, "Failed to set transpose3D kernel arg 2"); - - status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose3D kernel"); - - status = clEnqueueTask(queue6, fftc_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - - status = clSetKernelArgSVMPointer(store_kernel, 0, (void *)h_outData[how_many - 1]); - checkError(status, "Failed to set store kernel arg"); - status = clEnqueueTask(queue7, store_kernel, 0, NULL, &endExec_event); - checkError(status, "Failed to launch store kernel"); - - status = clFinish(queue5); - checkError(status, "Failed to finish queue5"); - status = clFinish(queue6); - checkError(status, "Failed to finish queue6"); - status = clFinish(queue7); - checkError(status, "Failed to finish queue7"); - - fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; - - cl_ulong kernel_start = 0, kernel_end = 0; - - clGetEventProfilingInfo(startExec_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); - clGetEventProfilingInfo(endExec_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end, NULL); - - fft_time.exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); - - double svm_copyout_t = 0.0; - for(size_t i = 0; i < how_many; i++){ - - // copy data into h_outData - size_t num_bytes = num_pts * sizeof(float2); - svm_copyout_t = getTimeinMilliSec(); - - status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, - (void *)h_outData[i], sizeof(float2) * num_pts, 0, NULL, NULL); - checkError(status, "Failed to map out data"); - - memcpy(&out[i*num_pts], &h_outData[i][0], num_bytes); - - status = clEnqueueSVMUnmap(queue1, (void *)h_outData[i], 0, NULL, NULL); - checkError(status, "Failed to unmap out data"); - fft_time.svm_copyout_t += getTimeinMilliSec() - svm_copyout_t; - } - - for(size_t i = 0; i < how_many; i++){ - clSVMFree(context, h_inData[i]); - clSVMFree(context, h_outData[i]); - } - - queue_cleanup(); - - if (d_inOutData_0) - clReleaseMemObject(d_inOutData_0); - if (d_inOutData_1) - clReleaseMemObject(d_inOutData_1); - - if(fetch_kernel) - clReleaseKernel(fetch_kernel); - - if(ffta_kernel) - clReleaseKernel(ffta_kernel); - if(fftb_kernel) - clReleaseKernel(fftb_kernel); - if(fftc_kernel) - clReleaseKernel(fftc_kernel); - - if(transpose_kernel) - clReleaseKernel(transpose_kernel); - - if(transpose3D_kernel) - clReleaseKernel(transpose3D_kernel); - - if(store_kernel) - clReleaseKernel(store_kernel); - - fft_time.valid = true; - return fft_time; -} +} \ No newline at end of file diff --git a/api/src/fft3d_svm.c b/api/src/fft3d_svm.c new file mode 100644 index 0000000..c4bc9a1 --- /dev/null +++ b/api/src/fft3d_svm.c @@ -0,0 +1,521 @@ +// Author: Arjun Ramaswami + +#include +#include +#include +#include +#include +#define CL_VERSION_2_0 +#include // to disable interleaving & transfer data to specific banks - CL_CHANNEL_1_INTELFPGA +#include "CL/opencl.h" + +#include "fpga_state.h" +#include "fftfpga/fftfpga.h" +#include "opencl_utils.h" +#include "misc.h" +#include "svm.h" +#include "/opt/intelFPGA_pro/19.2.0/hld/board/custom_platform_toolkit/mmd/aocl_mmd.h" + +#define WR_GLOBALMEM 0 +#define RD_GLOBALMEM 1 +#define BATCH 2 + +/** + * \brief compute an out-of-place single precision complex 3D FFT using the DDR for 3D Transpose where the data access between the host and the FPGA is using Shared Virtual Memory (SVM) + * \param N : integer pointer addressing the size of FFT3d + * \param inp : float2 pointer to input data of size [N * N * N] + * \param out : float2 pointer to output data of size [N * N * N] + * \param inv : int toggle to activate backward FFT + * \param interleaving : 1 if using burst interleaved global memory buffers + * \return fpga_t : time taken in milliseconds for data transfers and execution + */ +fpga_t fftfpgaf_c2c_3d_ddr_svm(const unsigned N, const float2 *inp, float2 *out, const bool inv, const bool interleaving) { + fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, false}; + cl_int status = 0; + unsigned num_pts = N * N * N; + + // 0 - WR_GLOBALMEM, 1 - RD_GLOBALMEM, 2 - BATCH + int mode = WR_GLOBALMEM; + + // if N is not a power of 2 + if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || !(svm_enabled)){ + return fft_time; + } + + // Can't pass bool to device, so convert it to int + int inverse_int = (int)inv; + + // Setup kernels + cl_kernel fetch_kernel = clCreateKernel(program, "fetch", &status); + checkError(status, "Failed to create fetch kernel"); + cl_kernel ffta_kernel = clCreateKernel(program, "fft3da", &status); + checkError(status, "Failed to create fft3da kernel"); + cl_kernel transpose_kernel = clCreateKernel(program, "transpose", &status); + checkError(status, "Failed to create transpose kernel"); + cl_kernel fftb_kernel = clCreateKernel(program, "fft3db", &status); + checkError(status, "Failed to create fft3db kernel"); + cl_kernel transpose3D_kernel= clCreateKernel(program, "transpose3D", &status); + checkError(status, "Failed to create transpose3D kernel"); + + cl_kernel fftc_kernel = clCreateKernel(program, "fft3dc", &status); + checkError(status, "Failed to create fft3dc kernel"); + cl_kernel store_kernel = clCreateKernel(program, "store", &status); + checkError(status, "Failed to create store kernel"); + + // Setup Queues to the kernels + queue_setup(); + + // Device memory buffers + cl_mem d_inOutData; + if(!interleaving){ + d_inOutData = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + } + else{ + d_inOutData = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + } + + // allocate SVM buffers + float2 *h_inData, *h_outData; + h_inData = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); + h_outData = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); + + size_t num_bytes = num_pts * sizeof(float2); + double svm_copyin_t = getTimeinMilliSec(); + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map input data"); + + // copy data into h_inData + memcpy(h_inData, inp, num_bytes); + + status = clEnqueueSVMUnmap(queue1, (void *)h_inData, 0, NULL, NULL); + checkError(status, "Failed to unmap input data"); + fft_time.svm_copyin_t += getTimeinMilliSec() - svm_copyin_t; + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map input data"); + + // copy data into h_inData + memset(&h_outData[0], 0, num_bytes); + + status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); + checkError(status, "Failed to unmap input data"); + + /* + * kernel arguments + */ + // write to fetch kernel using SVM based PCIe + status = clSetKernelArgSVMPointer(fetch_kernel, 0, (void *)h_inData); + checkError(status, "Failed to set fetch kernel arg"); + + status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set ffta kernel arg"); + status=clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftb kernel arg"); + + // kernel stores to DDR memory + status=clSetKernelArg(transpose3D_kernel, 0, sizeof(cl_mem), (void*)&d_inOutData); + checkError(status, "Failed to set transpose3D kernel arg"); + + // kernel fetches from DDR memory + status=clSetKernelArg(transpose3D_kernel, 1, sizeof(cl_mem), (void*)&d_inOutData); + checkError(status, "Failed to set transpose3D kernel arg"); + + mode = WR_GLOBALMEM; + + status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); + checkError(status, "Failed to set transpose3D kernel arg 2"); + + status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftc kernel arg"); + + // kernel stores using SVM based PCIe to host + status = clSetKernelArgSVMPointer(store_kernel, 0, (void*)h_outData); + checkError(status, "Failed to set store kernel arg"); + + cl_event startExec_event, endExec_event; + status = clEnqueueTask(queue7, store_kernel, 0, NULL, &endExec_event); + checkError(status, "Failed to launch transpose kernel"); + + status = clEnqueueTask(queue6, fftc_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + mode = RD_GLOBALMEM; + + status = clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); + checkError(status, "Failed to set transpose3D kernel arg 2"); + + status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second fft kernel"); + + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, &startExec_event); + checkError(status, "Failed to launch fetch kernel"); + + status = clFinish(queue7); + checkError(status, "failed to finish"); + status = clFinish(queue6); + checkError(status, "failed to finish"); + status = clFinish(queue5); + checkError(status, "failed to finish"); + status = clFinish(queue4); + checkError(status, "failed to finish"); + status = clFinish(queue3); + checkError(status, "failed to finish"); + status = clFinish(queue2); + checkError(status, "failed to finish"); + status = clFinish(queue1); + checkError(status, "failed to finish"); + + cl_ulong kernel_start = 0, kernel_end = 0; + + clGetEventProfilingInfo(startExec_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); + clGetEventProfilingInfo(endExec_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end, NULL); + + fft_time.exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); + + double svm_copyout_t = 0.0; + svm_copyout_t = getTimeinMilliSec(); + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, + (void *)h_outData, sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map out data"); + + memcpy(out, h_outData, num_bytes); + + status = clEnqueueSVMUnmap(queue1, (void *)h_outData, 0, NULL, NULL); + checkError(status, "Failed to unmap out data"); + fft_time.svm_copyout_t += getTimeinMilliSec() - svm_copyout_t; + + if (h_inData) + clSVMFree(context, h_inData); + if (h_outData) + clSVMFree(context, h_outData); + + queue_cleanup(); + + if (d_inOutData) + clReleaseMemObject(d_inOutData); + + if(fetch_kernel) + clReleaseKernel(fetch_kernel); + + if(ffta_kernel) + clReleaseKernel(ffta_kernel); + if(fftb_kernel) + clReleaseKernel(fftb_kernel); + if(fftc_kernel) + clReleaseKernel(fftc_kernel); + + if(transpose_kernel) + clReleaseKernel(transpose_kernel); + + if(transpose3D_kernel) + clReleaseKernel(transpose3D_kernel); + + if(store_kernel) + clReleaseKernel(store_kernel); + + fft_time.valid = true; + return fft_time; +} + + +/** + * \brief compute an batched out-of-place single precision complex 3D-FFT using the DDR of the FPGA for 3D Transpose and for data transfers between host's main memory and FPGA using Shared Virtual Memory + * \param N : integer pointer addressing the size of FFT3d + * \param inp : float2 pointer to input data of size [N * N * N] + * \param out : float2 pointer to output data of size [N * N * N] + * \param inv : int toggle to activate backward FFT + * \param how_many : number of batched computations + * \return fpga_t : time taken in milliseconds for data transfers and execution + */ +fpga_t fftfpgaf_c2c_3d_ddr_svm_batch(const unsigned N, const float2 *inp, float2 *out, const bool inv, const unsigned how_many) { + fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0.0, false}; + cl_int status = 0; + // 0 - WR_GLOBALMEM, 1 - RD_GLOBALMEM, 2 - BATCH + int mode_transpose = WR_GLOBALMEM; + + // if N is not a power of 2 + if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || (how_many <= 0) || !svm_enabled){ + return fft_time; + } + + // Can't pass bool to device, so convert it to int + int inverse_int = (int)inv; + + // Setup kernels + cl_kernel fetch_kernel = clCreateKernel(program, "fetch", &status); + checkError(status, "Failed to create fetch kernel"); + cl_kernel ffta_kernel = clCreateKernel(program, "fft3da", &status); + checkError(status, "Failed to create fft3da kernel"); + cl_kernel transpose_kernel = clCreateKernel(program, "transpose", &status); + checkError(status, "Failed to create transpose kernel"); + cl_kernel fftb_kernel = clCreateKernel(program, "fft3db", &status); + checkError(status, "Failed to create fft3db kernel"); + cl_kernel transpose3D_kernel = clCreateKernel(program, "transpose3D", &status); + checkError(status, "Failed to create transpose3D kernel"); + + cl_kernel fftc_kernel = clCreateKernel(program, "fft3dc", &status); + checkError(status, "Failed to create fft3dc kernel"); + cl_kernel store_kernel = clCreateKernel(program, "store", &status); + checkError(status, "Failed to create store kernel"); + + // Setup Queues to the kernels + queue_setup(); + + // Device memory buffers: double buffers + unsigned num_pts = N * N * N; + + cl_mem d_inOutData_0 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + cl_mem d_inOutData_1 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + checkError(status, "Failed to allocate output device buffer\n"); + + // allocate and initialize SVM buffers + double svm_copyin_t = 0.0; + float2 *h_inData[how_many], *h_outData[how_many]; + for(size_t i = 0; i < how_many; i++){ + + h_inData[i] = (float2 *)clSVMAlloc(context, CL_MEM_READ_ONLY, sizeof(float2) * num_pts, 0); + h_outData[i] = (float2 *)clSVMAlloc(context, CL_MEM_WRITE_ONLY, sizeof(float2) * num_pts, 0); + + size_t num_bytes = num_pts * sizeof(float2); + + svm_copyin_t = getTimeinMilliSec(); + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_inData[i], sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map input data"); + + // copy data into h_inData + memcpy(&h_inData[i][0], &inp[i*num_pts], num_bytes); + + status = clEnqueueSVMUnmap(queue1, (void *)h_inData[i], 0, NULL, NULL); + checkError(status, "Failed to unmap input data"); + fft_time.svm_copyin_t += getTimeinMilliSec() - svm_copyin_t; + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_WRITE, (void *)h_outData[i], sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map input data"); + + // set h_outData to 0 + memset(&h_outData[i][0], 0, num_bytes); + + status = clEnqueueSVMUnmap(queue1, (void *)h_outData[i], 0, NULL, NULL); + checkError(status, "Failed to unmap input data"); + } + + /* + * kernel arguments + */ + // write to fetch kernel using SVM based PCIe + status = clSetKernelArgSVMPointer(fetch_kernel, 0, (void *)h_inData[0]); + checkError(status, "Failed to set fetch1 kernel arg"); + + status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set ffta kernel arg"); + // transpose() has no arguments + status=clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftb kernel arg"); + + // kernel stores to DDR memory + status=clSetKernelArg(transpose3D_kernel, 0, sizeof(cl_mem), (void*)&d_inOutData_1); + checkError(status, "Failed to set transpose3D kernel arg"); + + status=clSetKernelArg(transpose3D_kernel, 1, sizeof(cl_mem), (void*)&d_inOutData_0); + checkError(status, "Failed to set transpose3D kernel arg"); + + mode_transpose = WR_GLOBALMEM; + status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode_transpose); + checkError(status, "Failed to set transpose3D kernel arg"); + + status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); + checkError(status, "Failed to set fftc kernel arg"); + + cl_event startExec_event, endExec_event; + /* + * First batch write phase + */ + fft_time.exec_t = getTimeinMilliSec(); + status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second transpose kernel"); + + status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second fft kernel"); + + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, &startExec_event); + checkError(status, "Failed to launch fetch kernel"); + + status = clFinish(queue1); + checkError(status, "Failed to finish queue1"); + status = clFinish(queue2); + checkError(status, "Failed to finish queue2"); + status = clFinish(queue3); + checkError(status, "Failed to finish queue3"); + status = clFinish(queue4); + checkError(status, "Failed to finish queue4"); + status = clFinish(queue5); + checkError(status, "Failed to finish queue5"); + + for(size_t i = 1; i < how_many; i++){ + + status = clSetKernelArgSVMPointer(fetch_kernel, 0, (void *)h_inData[i]); + checkError(status, "Failed to set fetch kernel arg"); + + status = clSetKernelArg(transpose3D_kernel, 0, sizeof(cl_mem), ((i % 2) == 1) ? (void*)&d_inOutData_0 : (void*)&d_inOutData_1); + checkError(status, "Failed to set transpose3D kernel arg 0"); + + status = clSetKernelArg(transpose3D_kernel, 1, sizeof(cl_mem), ((i % 2) == 1) ? (void*)&d_inOutData_1 : (void*)&d_inOutData_0); + checkError(status, "Failed to set transpose3D kernel arg 1"); + + mode_transpose = BATCH; + status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode_transpose); + checkError(status, "Failed to set transpose3D kernel arg 2"); + + status = clSetKernelArgSVMPointer(store_kernel, 0, (void *)h_outData[i-1]); + checkError(status, "Failed to set store kernel arg"); + + // Enqueue Tasks + status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose3D kernel"); + + status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); + + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second fft kernel"); + + status = clEnqueueTask(queue6, fftc_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue7, store_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch store kernel"); + + status = clFinish(queue1); + checkError(status, "Failed to finish queue1"); + status = clFinish(queue2); + checkError(status, "Failed to finish queue2"); + status = clFinish(queue3); + checkError(status, "Failed to finish queue3"); + status = clFinish(queue4); + checkError(status, "Failed to finish queue4"); + status = clFinish(queue5); + checkError(status, "Failed to finish queue5"); + status = clFinish(queue6); + checkError(status, "Failed to finish queue6"); + status = clFinish(queue7); + checkError(status, "Failed to finish queue7"); + } + + status = clSetKernelArg(transpose3D_kernel, 0, sizeof(cl_mem), ((how_many % 2) == 0) ? (void*)&d_inOutData_1 : (void*)&d_inOutData_0); + checkError(status, "Failed to set transpose3D kernel arg 0"); + + status = clSetKernelArg(transpose3D_kernel, 1, sizeof(cl_mem), ((how_many % 2) == 0) ? (void*)&d_inOutData_0 : (void*)&d_inOutData_1); + checkError(status, "Failed to set transpose3D kernel arg 1"); + + mode_transpose = RD_GLOBALMEM; + status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode_transpose); + checkError(status, "Failed to set transpose3D kernel arg 2"); + + status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose3D kernel"); + + status = clEnqueueTask(queue6, fftc_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clSetKernelArgSVMPointer(store_kernel, 0, (void *)h_outData[how_many - 1]); + checkError(status, "Failed to set store kernel arg"); + status = clEnqueueTask(queue7, store_kernel, 0, NULL, &endExec_event); + checkError(status, "Failed to launch store kernel"); + + status = clFinish(queue5); + checkError(status, "Failed to finish queue5"); + status = clFinish(queue6); + checkError(status, "Failed to finish queue6"); + status = clFinish(queue7); + checkError(status, "Failed to finish queue7"); + + fft_time.exec_t = getTimeinMilliSec() - fft_time.exec_t; + + cl_ulong kernel_start = 0, kernel_end = 0; + + clGetEventProfilingInfo(startExec_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); + clGetEventProfilingInfo(endExec_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end, NULL); + + fft_time.exec_t = (cl_double)(kernel_end - kernel_start) * (cl_double)(1e-06); + + double svm_copyout_t = 0.0; + for(size_t i = 0; i < how_many; i++){ + + // copy data into h_outData + size_t num_bytes = num_pts * sizeof(float2); + svm_copyout_t = getTimeinMilliSec(); + + status = clEnqueueSVMMap(queue1, CL_TRUE, CL_MAP_READ, + (void *)h_outData[i], sizeof(float2) * num_pts, 0, NULL, NULL); + checkError(status, "Failed to map out data"); + + memcpy(&out[i*num_pts], &h_outData[i][0], num_bytes); + + status = clEnqueueSVMUnmap(queue1, (void *)h_outData[i], 0, NULL, NULL); + checkError(status, "Failed to unmap out data"); + fft_time.svm_copyout_t += getTimeinMilliSec() - svm_copyout_t; + } + + for(size_t i = 0; i < how_many; i++){ + clSVMFree(context, h_inData[i]); + clSVMFree(context, h_outData[i]); + } + + queue_cleanup(); + + if (d_inOutData_0) + clReleaseMemObject(d_inOutData_0); + if (d_inOutData_1) + clReleaseMemObject(d_inOutData_1); + + if(fetch_kernel) + clReleaseKernel(fetch_kernel); + + if(ffta_kernel) + clReleaseKernel(ffta_kernel); + if(fftb_kernel) + clReleaseKernel(fftb_kernel); + if(fftc_kernel) + clReleaseKernel(fftc_kernel); + + if(transpose_kernel) + clReleaseKernel(transpose_kernel); + + if(transpose3D_kernel) + clReleaseKernel(transpose3D_kernel); + + if(store_kernel) + clReleaseKernel(store_kernel); + + fft_time.valid = true; + return fft_time; +} \ No newline at end of file diff --git a/api/src/fftfpga.c b/api/src/fftfpga.c index 711cd43..489a5f7 100755 --- a/api/src/fftfpga.c +++ b/api/src/fftfpga.c @@ -89,21 +89,21 @@ int fpga_initialize(const char *platform_name, const char *path, bool use_svm){ cl_uint num_devices; devices = getDevices(platform, CL_DEVICE_TYPE_ALL, &num_devices); // Unable to find device for the OpenCL platform - printf("\n\t%u devices found\n", num_devices); + printf("\n-- %u devices found\n", num_devices); if(devices == NULL){ return -3; } // use the first device. device = devices[0]; - printf("\t\tChoosing first device by default\n"); + printf("\tChoosing first device by default\n"); if(use_svm){ if(!check_valid_svm_device(device)){ return -5; } else{ - printf("\t\tDevice supports SVM \n"); + printf("-- Device supports SVM \n"); svm_enabled = true; } } @@ -112,7 +112,7 @@ int fpga_initialize(const char *platform_name, const char *path, bool use_svm){ context = clCreateContext(NULL, 1, &device, NULL, NULL, &status); checkError(status, "Failed to create context"); - printf("\n\tGetting program binary from path: %s\n", path); + printf("\n-- Getting program binary from path: %s\n", path); // Create the program. program = getProgramWithBinary(context, &device, 1, path); if(program == NULL) { @@ -121,7 +121,7 @@ int fpga_initialize(const char *platform_name, const char *path, bool use_svm){ return -4; } - printf("\tBuilding the program\n\n"); + printf("-- Building the program\n\n"); // Build the program that was just created. status = clBuildProgram(program, 0, NULL, "", NULL, NULL); checkError(status, "Failed to build program"); @@ -145,7 +145,6 @@ void fpga_final(){ * \brief Create a command queue for each kernel */ void queue_setup(){ - printf("-- Creating queues\n"); cl_int status = 0; // Create one command queue for each kernel. queue1 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status); @@ -170,7 +169,6 @@ void queue_setup(){ * \brief Release all command queues */ void queue_cleanup() { - printf("-- Destroying queues\n"); if(queue1) clReleaseCommandQueue(queue1); if(queue2) diff --git a/api/src/opencl_utils.c b/api/src/opencl_utils.c index f5c2677..de2bc8b 100755 --- a/api/src/opencl_utils.c +++ b/api/src/opencl_utils.c @@ -50,7 +50,7 @@ cl_platform_id findPlatform(const char *platform_name){ char name_search[pl_len + 1]; // VLA tolowercase(platform_name, name_search); - printf("\t%d platforms found\n", num_platforms); + printf("-- %d platforms found\n", num_platforms); // Search the platforms for the platform name passed as argument for(int i = 0; i < num_platforms; i++){ @@ -73,7 +73,7 @@ cl_platform_id findPlatform(const char *platform_name){ } tolowercase(plat_name, plat_name_lc); - printf("\t\t%d: %s\n", i, plat_name_lc); + printf("\t%d: %s\n", i, plat_name_lc); if( strstr(plat_name_lc, name_search)){ cl_platform_id pid = pids[i]; free(pids); diff --git a/cmake/genKernelTargets.cmake b/cmake/genKernelTargets.cmake index 03410a2..4fc72b5 100644 --- a/cmake/genKernelTargets.cmake +++ b/cmake/genKernelTargets.cmake @@ -17,17 +17,17 @@ function(gen_fft_targets) set(CL_HEADER "${CMAKE_BINARY_DIR}/kernels/common/fft_config.h") set(EMU_BSTREAM - "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/emu_${FFT_SIZE}_${kernel_fname}/${kernel_fname}.aocx") + "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${FPGA_BOARD_NAME}/emulation/${kernel_fname}_${FFT_SIZE}_${BURST}/${kernel_fname}.aocx") set(REP_BSTREAM - "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/rep_${FFT_SIZE}_${kernel_fname}/${kernel_fname}.aocr") + "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${FPGA_BOARD_NAME}/reports/${kernel_fname}_${FFT_SIZE}_${BURST}/${kernel_fname}.aocr") set(PROF_BSTREAM - "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/prof_${FFT_SIZE}_${kernel_fname}/${kernel_fname}.aocx") + "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${FPGA_BOARD_NAME}/profile/${kernel_fname}_${FFT_SIZE}_${BURST}/${kernel_fname}.aocx") set(SYN_BSTREAM - "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${BSP_VERSION}/syn_${FFT_SIZE}_${kernel_fname}/${kernel_fname}.aocx") + "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${FPGA_BOARD_NAME}/${SDK_VERSION}sdk_${BSP_VERSION}bsp/${kernel_fname}_${BURST}/${kernel_fname}_${FFT_SIZE}.aocx") # Emulation Target add_custom_command(OUTPUT ${EMU_BSTREAM} - COMMAND ${IntelFPGAOpenCL_AOC} ${CL_SRC} ${CL_INCL_DIR} ${AOC_FLAGS} ${EMU_FLAGS} -board=${FPGA_BOARD_NAME} -o ${EMU_BSTREAM} + COMMAND ${IntelFPGAOpenCL_AOC} ${CL_SRC} ${CL_INCL_DIR} ${AOC_FLAGS} ${EMU_FLAGS} -o ${EMU_BSTREAM} MAIN_DEPENDENCY ${CL_SRC} VERBATIM ) diff --git a/examples/fft.cpp b/examples/fft.cpp index f0cfe00..7f3ef1c 100644 --- a/examples/fft.cpp +++ b/examples/fft.cpp @@ -17,8 +17,8 @@ int main(int argc, char* argv[]){ else platform = "Intel(R) FPGA SDK for OpenCL(TM)"; - bool use_svm = false; - int isInit = fpga_initialize(platform, config.path.data(), use_svm); + bool use_usm = false; + int isInit = fpga_initialize(platform, config.path.data(), use_usm); if(isInit != 0){ cerr << "FPGA initialization error\n"; return EXIT_FAILURE; @@ -52,6 +52,13 @@ int main(int argc, char* argv[]){ runtime[i] = fftfpgaf_c2c_3d_bram(num, inp, out, inv, burst); else if(!config.use_bram && (config.batch > 1)) runtime[i] = fftfpgaf_c2c_3d_ddr_batch(num, inp, out, inv, burst, config.batch); + else if (use_usm == 1){ + if(config.batch > 1) + runtime[i] = fftfpgaf_c2c_3d_ddr_svm_batch(num, inp, out, inv, config.batch); + else + runtime[i] = fftfpgaf_c2c_3d_ddr_svm(num, inp, out, inv, burst); + break; + } else runtime[i] = fftfpgaf_c2c_3d_ddr(num, inp, out, inv); break; diff --git a/examples/helper.cpp b/examples/helper.cpp index 72a5e01..39f948f 100644 --- a/examples/helper.cpp +++ b/examples/helper.cpp @@ -31,6 +31,14 @@ void create_data(float2 *inp, const unsigned num){ inp[i].x = (float)((float)rand() / (float)RAND_MAX); inp[i].y = (float)((float)rand() / (float)RAND_MAX); } + + /* + printf("Creating Data\n"); + for(unsigned i = 0; i < num; i++){ + printf("%u: (%f, %f)\n", i, inp[i].x, inp[i].y); + } + printf("\n"); + */ } /** @@ -52,6 +60,7 @@ void parse_args(int argc, char* argv[], CONFIG &config){ ("c, batch", "Number of batches of FFT calculations in FPGA", cxxopts::value()->default_value("1") ) ("t, burst", "Toggle to use burst interleaved global memory accesses in FPGA", cxxopts::value()->default_value("false") ) ("m, use_bram", "Toggle to use BRAM instead of DDR for 3D Transpose ", cxxopts::value()->default_value("false") ) + ("s, use_usm", "Toggle to use Unified Shared Memory features for data transfers between host and device", cxxopts::value()->default_value("false") ) ("e, emulate", "Toggle to enable emulation ", cxxopts::value()->default_value("false") ) ("h,help", "Print usage"); auto opt = options.parse(argc, argv); @@ -71,6 +80,7 @@ void parse_args(int argc, char* argv[], CONFIG &config){ config.burst = opt["burst"].as(); config.use_bram = opt["use_bram"].as(); config.emulate = opt["emulate"].as(); + config.use_usm = opt["use_usm"].as(); if(opt.count("path")){ config.path = opt["path"].as(); @@ -98,6 +108,7 @@ void print_config(CONFIG config){ printf("Transpose3D : %s \n", config.use_bram ? "BRAM":"DDR"); printf("Burst Interleaving : %s \n", config.burst ? "Yes":"No"); printf("Emulation : %s \n", config.emulate ? "Yes":"No"); + printf("USM Feature : %s \n", config.use_usm ? "Yes":"No"); printf("--------------------------------------------\n\n"); } @@ -221,9 +232,9 @@ void perf_measures(const CONFIG config, fpga_t *runtime){ variance.pcie_read_t += pow(runtime[i].pcie_read_t - avg_runtime.pcie_read_t, 2); variance.pcie_write_t += pow(runtime[i].pcie_write_t - avg_runtime.pcie_write_t, 2); } - sd.exec_t = variance.exec_t / config.iter; - sd.pcie_read_t = variance.pcie_read_t / config.iter; - sd.pcie_write_t = variance.pcie_write_t / config.iter; + sd.exec_t = sqrt(variance.exec_t / config.iter); + sd.pcie_read_t = sqrt(variance.pcie_read_t / config.iter); + sd.pcie_write_t = sqrt(variance.pcie_write_t / config.iter); double avg_total_runtime = avg_runtime.exec_t + avg_runtime.pcie_write_t + avg_runtime.pcie_read_t; @@ -245,7 +256,7 @@ void perf_measures(const CONFIG config, fpga_t *runtime){ printf("Throughput = %.4lfGFLOPS/s | %.4lf GB/s\n", gflops, gBytes_per_sec); if(config.iter > 1){ printf("\n"); - printf("%s", config.iter>1 ? "Standard Deviations of iterations\n":""); + printf("%s", config.iter>1 ? "Deviation of runtimes among iterations\n":""); printf("PCIe Write = %.4lfms\n", sd.pcie_write_t); printf("Kernel Execution = %.4lfms\n", sd.exec_t); printf("PCIe Read = %.4lfms\n", sd.pcie_read_t); diff --git a/examples/helper.hpp b/examples/helper.hpp index 0c72191..1fffdd3 100644 --- a/examples/helper.hpp +++ b/examples/helper.hpp @@ -15,6 +15,7 @@ struct CONFIG{ bool burst; bool use_bram; bool emulate; + bool use_usm; }; void parse_args(int argc, char* argv[], CONFIG &config); diff --git a/kernels/CMakeLists.txt b/kernels/CMakeLists.txt index 2276f21..4d6ed0c 100644 --- a/kernels/CMakeLists.txt +++ b/kernels/CMakeLists.txt @@ -1,12 +1,31 @@ # Arjun Ramaswami cmake_minimum_required(VERSION 3.10) -project(fftkernelsfpga VERSION 0.2 +project(fftkernelsfpga VERSION 2.0 DESCRIPTION "Kernels for FFT using FPGAs" LANGUAGES C CXX) -# OpenCL kernel targets generation -## setup cmake variables to generate header file +set(FPGA_BOARD_NAME $ENV{FPGA_BOARD_NAME} CACHE STRING "Target Board Name") +if(DEFINED FPGA_BOARD_NAME) + message("-- FPGA Board Name: ${FPGA_BOARD_NAME}") +else() + message(ERROR, "No Target board found") +endif() + +set(SDK_VERSION $ENV{QUARTUS_VERSION} CACHE STRING "SDK Version") +if(SDK_VERSION) + message("-- SDK Version: ${SDK_VERSION}") +else() + message(ERROR, "No SDK Version Found") +endif() + +set(BSP_VERSION $ENV{QUARTUS_VERSION_BSP} CACHE STRING "BSP Version") +if(BSP_VERSION) + message("-- BSP Version: ${BSP_VERSION}") +else() + message(ERROR, "No BSP Found") +endif() + set(LOG_POINTS 3 CACHE STRING "Log of per sample data points") math(EXPR POINTS "1 << ${LOG_POINTS}") @@ -16,15 +35,33 @@ math(EXPR FFT_SIZE "1 << ${LOG_FFT_SIZE}") message("-- FFT size is ${FFT_SIZE}") math(EXPR DEPTH "1 << (${LOG_FFT_SIZE} + ${LOG_FFT_SIZE} - ${LOG_POINTS})") -#set(BUF_LOC "DDR") -set(DDR_BUFFER_LOCATION "DDR" CACHE STRING "Buffer location of 3d Transpose") -set_property(CACHE DDR_BUFFER_LOCATION PROPERTY STRINGS "DDR" "device") +set(BURST_INTERLEAVING CACHE BOOL "Enable burst interleaving") +if(BURST_INTERLEAVING) + set(INTERLEAVING "") + set(BURST "burstinter") + message("-- Burst interleaved global memory accesses") +else() + set(INTERLEAVING "-no-interleaving=default") + set(BURST "nointer") + message("-- Non-interleaved global memory accesses") +endif() -set(SVM_HOST_BUFFER_LOCATION "" CACHE STRING "SVM host buffer location") +if(${FPGA_BOARD_NAME} STREQUAL "pac_s10_usm") + set(DDR_BUFFER_LOCATION "device" CACHE STRING "Buffer location of 3d Transpose") + set(SVM_HOST_BUFFER_LOCATION "host" CACHE STRING "SVM host buffer location") +elseif(${FPGA_BOARD_NAME} STREQUAL "p520_hpc_sg280l") + set(DDR_BUFFER_LOCATION "DDR" CACHE STRING "Buffer location of 3d Transpose") + set(SVM_HOST_BUFFER_LOCATION "" CACHE STRING "SVM host buffer location") +else() + set(DDR_BUFFER_LOCATION "" CACHE STRING "Buffer location of 3d Transpose") + set(SVM_HOST_BUFFER_LOCATION "" CACHE STRING "SVM host buffer location") +endif() + +set_property(CACHE DDR_BUFFER_LOCATION PROPERTY STRINGS "DDR" "device" "") set_property(CACHE SVM_HOST_BUFFER_LOCATION PROPERTY STRINGS "" "host") -message("-- Buffer location for 3d Transpose is ${DDR_BUFFER_LOCATION}") -message("-- SVM host Buffer location ${SVM_BUFFER_LOCATION}") +message("-- Buffer location for 3d Transpose: ${DDR_BUFFER_LOCATION}") +message("-- SVM host Buffer location: ${SVM_HOST_BUFFER_LOCATION}") configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/common/fft_config.h.in" @@ -32,27 +69,8 @@ configure_file( ESCAPE_QUOTES ) -## -# Get FPGA Board Name from env variable or use default -# Set it to CMakeCache -## -if(NOT DEFINED FPGA_BOARD_NAME) - if(DEFINED $ENV{FPGA_BOARD_NAME}) - set(FPGA_BOARD_NAME $ENV{FPGA_BOARD_NAME} CACHE STRING "Target Board") - else() - set(FPGA_BOARD_NAME p520_hpc_sg280l CACHE STRING "Target Board") - endif() -endif() - -if(${FPGA_BOARD_NAME} STREQUAL "pac_s10_usm") - set(BOARD_PACKAGE "-board-package=/cm/shared/opt/intel_oneapi/beta-10/intelfpgadpcpp/2021.1-beta10/board/intel_s10sx_pac_usm/") - message("-- Board Package: ${BOARD_PACKAGE}") -else() - set(BOARD_PACKAGE "") -endif() - ## Flags for different target options -set(AOC_FLAGS "-g -v -no-interleaving=default ${BOARD_PACKAGE}" CACHE STRING "AOC compiler flags") +set(AOC_FLAGS "-g -v ${INTERLEAVING} -board=${FPGA_BOARD_NAME}" CACHE STRING "AOC compiler flags") separate_arguments(AOC_FLAGS) set(EMU_FLAGS "-march=emulator" CACHE STRING "AOC emulation flags") separate_arguments(EMU_FLAGS) @@ -61,13 +79,6 @@ separate_arguments(REP_FLAGS) set(PROF_FLAGS "-profile=all" CACHE STRING "AOC profile flags") separate_arguments(PROF_FLAGS) -if(DEFINED ENV{LMOD_FAMILY_BSP_VERSION}) - set(BSP_VERSION "$ENV{LMOD_FAMILY_BSP_VERSION}") - message("-- BSP Version: ${BSP_VERSION}") -else() - message(ERROR, "No BSP Version Found") -endif() - if (INTELFPGAOPENCL_FOUND) add_subdirectory(fft1d) add_subdirectory(fft2d) diff --git a/kernels/fft3d/CMakeLists.txt b/kernels/fft3d/CMakeLists.txt index c48ebc9..7d8c307 100644 --- a/kernels/fft3d/CMakeLists.txt +++ b/kernels/fft3d/CMakeLists.txt @@ -9,7 +9,7 @@ cmake_minimum_required(VERSION 3.10) # - ${kernel_name}_syn: to generate synthesis binary ## set(CL_PATH "${fftkernelsfpga_SOURCE_DIR}/fft3d") -set(kernels fft3d_bram fft3d_ddr fft3d_ddr_batch) +set(kernels fft3d_bram fft3d_ddr fft3d_ddr_batch fft3d_ddr_svm) include(${fft_SOURCE_DIR}/cmake/genKernelTargets.cmake) From d607e4245a9b867cf728cc73df5146af80fafb14 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Fri, 8 Oct 2021 12:34:58 +0200 Subject: [PATCH 48/76] identical fft3d ddr and svm kernels --- api/include/fftfpga/fftfpga.h | 4 +- api/src/fft3d.c | 451 ++++++++++++++++++---------------- api/src/fft3d_svm.c | 13 +- examples/fft.cpp | 9 +- kernels/fft3d/fft3d_ddr.cl | 308 ++++++++++++----------- 5 files changed, 409 insertions(+), 376 deletions(-) diff --git a/api/include/fftfpga/fftfpga.h b/api/include/fftfpga/fftfpga.h index 33b59d7..943aa30 100755 --- a/api/include/fftfpga/fftfpga.h +++ b/api/include/fftfpga/fftfpga.h @@ -159,9 +159,9 @@ extern fpga_t fftfpgaf_c2c_3d_bram(const unsigned N, const float2 *inp, float2 * * @param inv : int toggle to activate backward FFT * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpgaf_c2c_3d_ddr(int N, const float2 *inp, float2 *out, bool inv); +extern fpga_t fftfpgaf_c2c_3d_ddr(const unsigned N, const float2 *inp, float2 *out, const bool inv); -extern fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, const float2 *inp, float2 *out, bool inv, bool interleaving, int how_many); +extern fpga_t fftfpgaf_c2c_3d_ddr_batch(const unsigned N, const float2 *inp, float2 *out, const bool inv, const bool interleaving, const unsigned how_many); /** * @brief compute an out-of-place single precision complex 3D-FFT using the DDR of the FPGA and Shared Virtual Memory for Host to Device Communication diff --git a/api/src/fft3d.c b/api/src/fft3d.c index e88e5d0..f2ced99 100644 --- a/api/src/fft3d.c +++ b/api/src/fft3d.c @@ -14,15 +14,17 @@ #include "opencl_utils.h" #include "misc.h" +#define WR_GLOBALMEM 0 +#define RD_GLOBALMEM 1 #define BATCH 2 /** * \brief compute an out-of-place single precision complex 3D-FFT using the BRAM of the FPGA - * \param N : integer pointer addressing the size of FFT3d + * \param N : unsigned integer denoting the size of FFT3d * \param inp : float2 pointer to input data of size [N * N * N] * \param out : float2 pointer to output data of size [N * N * N] - * \param inv : int toggle to activate backward FFT - * \param interleaving : 1 if using burst interleaved global memory buffers + * \param inv : toggle to activate backward FFT + * \param interleaving : toggle to use burst interleaved global memory buffers * \return fpga_t : time taken in milliseconds for data transfers and execution */ fpga_t fftfpgaf_c2c_3d_bram(const unsigned N, const float2 *inp, float2 *out, const bool inv, const bool interleaving) { @@ -38,10 +40,6 @@ fpga_t fftfpgaf_c2c_3d_bram(const unsigned N, const float2 *inp, float2 *out, co return fft_time; } -#ifdef VERBOSE - printf("Launching%s 3d FFT transform using BRAM for 3D Transpose\n", inv ? " inverse":""); -#endif - queue_setup(); cl_mem_flags flagbuf1, flagbuf2; @@ -199,48 +197,43 @@ fpga_t fftfpgaf_c2c_3d_bram(const unsigned N, const float2 *inp, float2 *out, co /** * \brief compute an out-of-place single precision complex 3D-FFT using the DDR of the FPGA for 3D Transpose - * \param N : integer pointer addressing the size of FFT3d + * \param N : unsigned integer denoting the size of FFT3d * \param inp : float2 pointer to input data of size [N * N * N] * \param out : float2 pointer to output data of size [N * N * N] - * \param inv : int toggle to activate backward FFT - * \param interleaving : 1 if using burst interleaved global memory buffers + * \param inv : toggle to activate backward FFT + * \param interleaving : toggle to use burst interleaved global memory buffers * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_3d_ddr(int N, const float2 *inp, float2 *out, bool inv) { +fpga_t fftfpgaf_c2c_3d_ddr(const unsigned N, const float2 *inp, float2 *out, const bool inv) { fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_int status = 0; - int num_pts = N * N * N; + unsigned num_pts = N * N * N; + // 0 - WR_GLOBALMEM, 1 - RD_GLOBALMEM, 2 - BATCH + int mode = WR_GLOBALMEM; // if N is not a power of 2 if(inp == NULL || out == NULL || ( (N & (N-1)) !=0)){ return fft_time; } -#ifdef VERBOSE - printf("Launching%s 3d FFT transform in DDR \n", inv ? " inverse":""); -#endif - // Can't pass bool to device, so convert it to int int inverse_int = (int)inv; // Setup kernels - cl_kernel fetch1_kernel = clCreateKernel(program, "fetch1", &status); - checkError(status, "Failed to create fetch1 kernel"); + cl_kernel fetch_kernel = clCreateKernel(program, "fetch", &status); + checkError(status, "Failed to create fetch kernel"); cl_kernel ffta_kernel = clCreateKernel(program, "fft3da", &status); checkError(status, "Failed to create fft3da kernel"); cl_kernel transpose_kernel = clCreateKernel(program, "transpose", &status); checkError(status, "Failed to create transpose kernel"); cl_kernel fftb_kernel = clCreateKernel(program, "fft3db", &status); checkError(status, "Failed to create fft3db kernel"); - cl_kernel store1_kernel = clCreateKernel(program, "transposeStore1", &status); - checkError(status, "Failed to create store1 kernel"); - - cl_kernel fetch2_kernel = clCreateKernel(program, "fetch2", &status); - checkError(status, "Failed to create fetch2 kernel"); + cl_kernel transpose3D_kernel = clCreateKernel(program, "transpose3D", &status); + checkError(status, "Failed to create transpose3D kernel"); cl_kernel fftc_kernel = clCreateKernel(program, "fft3dc", &status); checkError(status, "Failed to create fft3dc kernel"); - cl_kernel store2_kernel = clCreateKernel(program, "transposeStore2", &status); - checkError(status, "Failed to create store2 kernel"); + cl_kernel store_kernel = clCreateKernel(program, "store", &status); + checkError(status, "Failed to create store kernel"); // Setup Queues to the kernels queue_setup(); @@ -271,39 +264,48 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, const float2 *inp, float2 *out, bool inv) { fft_time.pcie_write_t = (cl_double)(writeBuf_end - writeBuf_start) * (cl_double)(1e-06); - status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData); - checkError(status, "Failed to set fetch1 kernel arg"); + status=clSetKernelArg(fetch_kernel, 0, sizeof(cl_mem), (void *)&d_inData); + checkError(status, "Failed to set fetch kernel arg"); status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); checkError(status, "Failed to set ffta kernel arg"); status=clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); checkError(status, "Failed to set fftb kernel arg"); - status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_transpose); - checkError(status, "Failed to set store1 kernel arg"); + status=clSetKernelArg(transpose3D_kernel, 0, sizeof(cl_mem), (void *)&d_transpose); + checkError(status, "Failed to set transpose3D kernel arg 0"); + + status=clSetKernelArg(transpose3D_kernel, 1, sizeof(cl_mem), (void *)&d_transpose); + checkError(status, "Failed to set transpose3D kernel arg 1"); + + mode = WR_GLOBALMEM; + status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); + checkError(status, "Failed to set transpose3D kernel arg 2"); - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_transpose); - checkError(status, "Failed to set fetch2 kernel arg"); status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); checkError(status, "Failed to set fftc kernel arg"); - status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData); + status=clSetKernelArg(store_kernel, 0, sizeof(cl_mem), (void *)&d_outData); checkError(status, "Failed to set store2 kernel arg"); // Kernel Execution cl_event startExec_event, endExec_event; - status = clEnqueueTask(queue7, store2_kernel, 0, NULL, &endExec_event); + status = clEnqueueTask(queue7, store_kernel, 0, NULL, &endExec_event); checkError(status, "Failed to launch transpose kernel"); status = clEnqueueTask(queue6, fftc_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fft kernel"); - status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second transpose kernel"); + status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch write of transpose3d kernel"); // enqueue fetch to same queue as the store kernel due to data dependency // therefore, not swapped - status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); + mode = RD_GLOBALMEM; + status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); + checkError(status, "Failed to set transpose3D kernel arg 2"); + + status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch read of transpose3d kernel"); status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); checkError(status, "Failed to launch second fft kernel"); @@ -314,7 +316,7 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, const float2 *inp, float2 *out, bool inv) { status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fft kernel"); - status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, &startExec_event); + status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, &startExec_event); checkError(status, "Failed to launch fetch kernel"); status = clFinish(queue1); @@ -360,10 +362,10 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, const float2 *inp, float2 *out, bool inv) { if (d_transpose) clReleaseMemObject(d_transpose); - if(fetch1_kernel) - clReleaseKernel(fetch1_kernel); - if(fetch2_kernel) - clReleaseKernel(fetch2_kernel); + if(fetch_kernel) + clReleaseKernel(fetch_kernel); + if(transpose3D_kernel) + clReleaseKernel(transpose3D_kernel); if(ffta_kernel) clReleaseKernel(ffta_kernel); @@ -375,10 +377,8 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, const float2 *inp, float2 *out, bool inv) { if(transpose_kernel) clReleaseKernel(transpose_kernel); - if(store1_kernel) - clReleaseKernel(store1_kernel); - if(store2_kernel) - clReleaseKernel(store2_kernel); + if(store_kernel) + clReleaseKernel(store_kernel); fft_time.valid = 1; return fft_time; @@ -386,112 +386,106 @@ fpga_t fftfpgaf_c2c_3d_ddr(int N, const float2 *inp, float2 *out, bool inv) { /** * \brief compute an batched out-of-place single precision complex 3D-FFT using the DDR of the FPGA for 3D Transpose - * \param N : integer pointer addressing the size of FFT3d + * \param N : unsigned integer denoting the size of FFT3d * \param inp : float2 pointer to input data of size [N * N * N] * \param out : float2 pointer to output data of size [N * N * N] - * \param inv : int toggle to activate backward FFT + * \param inv : toggle to activate backward FFT * \param interleaving : enable burst interleaved global memory buffers * \param how_many : number of batched computations * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, const float2 *inp, float2 *out, bool inv, bool interleaving, int how_many) { +fpga_t fftfpgaf_c2c_3d_ddr_batch(const unsigned N, const float2 *inp, float2 *out, const bool inv, const bool interleaving, const unsigned how_many) { fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_int status = 0; - int num_pts = N * N * N; + unsigned num_pts = N * N * N; + int mode = WR_GLOBALMEM; // if N is not a power of 2 if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || (how_many <= 1)){ return fft_time; } -#ifdef VERBOSE - printf("Launching%s 3d FFT transform in DDR for Batched execution\n", inv ? " inverse":""); -#endif - // Can't pass bool to device, so convert it to int - int inverse_int = (int)inv; + const int inverse_int = (int)inv; // Setup kernels - cl_kernel fetch1_kernel = clCreateKernel(program, "fetch1", &status); - checkError(status, "Failed to create fetch1 kernel"); + cl_kernel fetch_kernel = clCreateKernel(program, "fetch", &status); + checkError(status, "Failed to create fetch kernel"); cl_kernel ffta_kernel = clCreateKernel(program, "fft3da", &status); checkError(status, "Failed to create fft3da kernel"); cl_kernel transpose_kernel = clCreateKernel(program, "transpose", &status); checkError(status, "Failed to create transpose kernel"); cl_kernel fftb_kernel = clCreateKernel(program, "fft3db", &status); checkError(status, "Failed to create fft3db kernel"); - cl_kernel store1_kernel = clCreateKernel(program, "transposeStore1", &status); - checkError(status, "Failed to create store1 kernel"); - - cl_kernel fetch2_kernel = clCreateKernel(program, "fetch2", &status); - checkError(status, "Failed to create fetch2 kernel"); + cl_kernel transpose3D_kernel = clCreateKernel(program, "transpose3D", &status); + checkError(status, "Failed to create transpose3D kernel"); cl_kernel fftc_kernel = clCreateKernel(program, "fft3dc", &status); checkError(status, "Failed to create fft3dc kernel"); - cl_kernel store2_kernel = clCreateKernel(program, "transposeStore2", &status); - checkError(status, "Failed to create store2 kernel"); + cl_kernel store_kernel = clCreateKernel(program, "store", &status); + checkError(status, "Failed to create store kernel"); // Setup Queues to the kernels queue_setup(); // Device memory buffers: using 1st and 2nd banks // Double Buffers, using 3rd and 4th banks - // a and b are double buffers - cl_mem d_inData1, d_inData2, d_inData3, d_inData4; - cl_mem d_outData1, d_outData2, d_outData3, d_outData4; - - d_inData1 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + cl_mem d_inData1 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); checkError(status, "Failed to allocate input device buffer\n"); - d_inData2 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + cl_mem d_inData2 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); checkError(status, "Failed to allocate input device buffer\n"); - d_inData3 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_3_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + cl_mem d_inData3 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_3_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); checkError(status, "Failed to allocate input device buffer\n"); - d_inData4 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_4_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + cl_mem d_inData4 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_CHANNEL_4_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); checkError(status, "Failed to allocate input device buffer\n"); - d_outData1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + cl_mem d_outData1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); checkError(status, "Failed to allocate output device buffer\n"); - d_outData2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + cl_mem d_outData2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); checkError(status, "Failed to allocate output device buffer\n"); - d_outData3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_3_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + cl_mem d_outData3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_3_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); checkError(status, "Failed to allocate output device buffer\n"); - d_outData4 = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_4_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + cl_mem d_outData4 = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_CHANNEL_4_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); checkError(status, "Failed to allocate output device buffer\n"); - cl_mem d_transpose1, d_transpose2, d_transpose3, d_transpose4; - d_transpose1 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + cl_mem d_transpose1 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_1_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); checkError(status, "Failed to allocate output device buffer\n"); - d_transpose2 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + cl_mem d_transpose2 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); checkError(status, "Failed to allocate output device buffer\n"); - d_transpose3 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_3_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + cl_mem d_transpose3 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_3_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); checkError(status, "Failed to allocate output device buffer\n"); - d_transpose4 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_4_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); + cl_mem d_transpose4 = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_4_INTELFPGA, sizeof(float2) * num_pts, NULL, &status); checkError(status, "Failed to allocate output device buffer\n"); // Default Kernel Arguments - status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData1); - checkError(status, "Failed to set fetch1 kernel arg"); + status=clSetKernelArg(fetch_kernel, 0, sizeof(cl_mem), (void *)&d_inData1); + checkError(status, "Failed to set fetch kernel arg"); status=clSetKernelArg(ffta_kernel, 0, sizeof(cl_int), (void*)&inverse_int); checkError(status, "Failed to set ffta kernel arg"); status=clSetKernelArg(fftb_kernel, 0, sizeof(cl_int), (void*)&inverse_int); checkError(status, "Failed to set fftb kernel arg"); - status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_transpose3); - checkError(status, "Failed to set store1 kernel arg"); - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_transpose3); - checkError(status, "Failed to set fetch2 kernel arg"); + /* Write into the mem and read from the same. Mode is at first write */ + status=clSetKernelArg(transpose3D_kernel, 0, sizeof(cl_mem), (void *)&d_transpose3); + checkError(status, "Failed to set transpose3D_kernel kernel arg 0"); + status=clSetKernelArg(transpose3D_kernel, 1, sizeof(cl_mem), (void *)&d_transpose3); + checkError(status, "Failed to set transpose3D_kernel kernel arg 1"); + mode = WR_GLOBALMEM; + status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); + checkError(status, "Failed to set transpose3D kernel arg 2"); + status=clSetKernelArg(fftc_kernel, 0, sizeof(cl_int), (void*)&inverse_int); checkError(status, "Failed to set fftc kernel arg"); - status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData1); - checkError(status, "Failed to set store2 kernel arg"); + status=clSetKernelArg(store_kernel, 0, sizeof(cl_mem), (void *)&d_outData1); + checkError(status, "Failed to set store kernel arg"); fft_time.exec_t = getTimeinMilliSec(); @@ -505,12 +499,17 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, const float2 *inp, float2 *out, bool inv // Second Phase // Unblocking write to DDR second buffer from index num_pts cl_event write_event[2]; - //status = clEnqueueWriteBuffer(queue6, d_inData2, CL_TRUE, 0, sizeof(float2) * num_pts, (void*)&inp[num_pts], 0, NULL, NULL); status = clEnqueueWriteBuffer(queue6, d_inData2, CL_FALSE, 0, sizeof(float2) * num_pts, (void*)&inp[num_pts], 0, NULL, &write_event[0]); checkError(status, "Failed to write to DDR buffer"); // Compute First FFT already transferred - status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue3, store_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fetch kernel"); status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); @@ -522,32 +521,30 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, const float2 *inp, float2 *out, bool inv status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); checkError(status, "Failed to launch second fft kernel"); - status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second transpose kernel"); - - status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); + status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose3D_kernel kernel"); - status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); + mode = RD_GLOBALMEM; + status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); + checkError(status, "Failed to set transpose3D kernel arg 2"); - status = clEnqueueTask(queue3, store2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); + status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose3D_kernel kernel"); // Check finish of transfer and computations clWaitForEvents(1, &write_event[0]); clReleaseEvent(write_event[0]); - status = clFinish(queue6); - checkError(status, "failed to finish"); - status = clFinish(queue5); + status = clFinish(queue1); checkError(status, "failed to finish"); - status = clFinish(queue4); + status = clFinish(queue2); checkError(status, "failed to finish"); status = clFinish(queue3); checkError(status, "failed to finish"); - status = clFinish(queue2); + status = clFinish(queue4); checkError(status, "failed to finish"); - status = clFinish(queue1); + status = clFinish(queue5); + checkError(status, "failed to finish"); + status = clFinish(queue6); checkError(status, "failed to finish"); // Loop over the 3 stages @@ -561,15 +558,20 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, const float2 *inp, float2 *out, bool inv status = clEnqueueReadBuffer(queue6, d_outData1, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(i * num_pts)], 0, NULL, &write_event[0]); checkError(status, "Failed to read from DDR buffer"); - status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData2); + status=clSetKernelArg(fetch_kernel, 0, sizeof(cl_mem), (void *)&d_inData2); checkError(status, "Failed to set fetch1 kernel arg"); - status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_transpose4); - checkError(status, "Failed to set store1 kernel arg"); + status=clSetKernelArg(transpose3D_kernel, 0, sizeof(cl_mem), (void *)&d_transpose4); + checkError(status, "Failed to set transpose3D_kernel kernel arg 0"); + + status=clSetKernelArg(transpose3D_kernel, 1, sizeof(cl_mem), (void *)&d_transpose4); + checkError(status, "Failed to set transpose3D_kernel kernel arg 1"); - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_transpose4); + mode = WR_GLOBALMEM; + status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); + checkError(status, "Failed to set transpose3D kernel arg 2"); - status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData2); + status=clSetKernelArg(store_kernel, 0, sizeof(cl_mem), (void *)&d_outData2); checkError(status, "Failed to set store2 kernel arg"); } else if( (i % 4) == 1){ @@ -579,16 +581,21 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, const float2 *inp, float2 *out, bool inv status = clEnqueueReadBuffer(queue6, d_outData2, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(i * num_pts)], 0, NULL, &write_event[0]); checkError(status, "Failed to read from DDR buffer"); - status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData3); - checkError(status, "Failed to set fetch1 kernel arg"); + status=clSetKernelArg(fetch_kernel, 0, sizeof(cl_mem), (void *)&d_inData3); + checkError(status, "Failed to set fetch kernel arg"); - status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_transpose1); - checkError(status, "Failed to set store1 kernel arg"); + status=clSetKernelArg(transpose3D_kernel, 0, sizeof(cl_mem), (void *)&d_transpose1); + checkError(status, "Failed to set transpose3D_kernel kernel arg 0"); - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_transpose1); + status=clSetKernelArg(transpose3D_kernel, 1, sizeof(cl_mem), (void *)&d_transpose1); + checkError(status, "Failed to set transpose3D_kernel kernel arg 1"); - status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData3); - checkError(status, "Failed to set store2 kernel arg"); + mode = WR_GLOBALMEM; + status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); + checkError(status, "Failed to set transpose3D kernel arg 2"); + + status=clSetKernelArg(store_kernel, 0, sizeof(cl_mem), (void *)&d_outData3); + checkError(status, "Failed to set store kernel arg"); } else if( (i % 4) == 2){ status = clEnqueueWriteBuffer(queue7, d_inData1, CL_FALSE, 0, sizeof(float2) * num_pts, &inp[( (i + 2) * num_pts)], 0, NULL, &write_event[1]); @@ -597,16 +604,21 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, const float2 *inp, float2 *out, bool inv status = clEnqueueReadBuffer(queue6, d_outData3, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(i * num_pts)], 0, NULL, &write_event[0]); checkError(status, "Failed to read from DDR buffer"); - status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData4); - checkError(status, "Failed to set fetch1 kernel arg"); + status=clSetKernelArg(fetch_kernel, 0, sizeof(cl_mem), (void *)&d_inData4); + checkError(status, "Failed to set fetch kernel arg"); - status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_transpose2); - checkError(status, "Failed to set store1 kernel arg"); + status=clSetKernelArg(transpose3D_kernel, 0, sizeof(cl_mem), (void *)&d_transpose2); + checkError(status, "Failed to set transpose3D_kernel kernel arg 0"); - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_transpose2); + status=clSetKernelArg(transpose3D_kernel, 1, sizeof(cl_mem), (void *)&d_transpose2); + checkError(status, "Failed to set transpose3D_kernel kernel arg 1"); - status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData4); - checkError(status, "Failed to set store2 kernel arg"); + mode = WR_GLOBALMEM; + status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); + checkError(status, "Failed to set transpose3D kernel arg 2"); + + status=clSetKernelArg(store_kernel, 0, sizeof(cl_mem), (void *)&d_outData4); + checkError(status, "Failed to set store kernel arg"); } else{ status = clEnqueueWriteBuffer(queue7, d_inData2, CL_FALSE, 0, sizeof(float2) * num_pts, &inp[( (i+2) * num_pts)], 0, NULL, &write_event[1]); @@ -615,57 +627,65 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, const float2 *inp, float2 *out, bool inv status = clEnqueueReadBuffer(queue6, d_outData4, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(i * num_pts)], 0, NULL, &write_event[0]); checkError(status, "Failed to read from DDR buffer"); - status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData1); - checkError(status, "Failed to set fetch1 kernel arg"); + status=clSetKernelArg(fetch_kernel, 0, sizeof(cl_mem), (void *)&d_inData1); + checkError(status, "Failed to set fetch kernel arg"); - status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_transpose3); - checkError(status, "Failed to set store1 kernel arg"); + status=clSetKernelArg(transpose3D_kernel, 0, sizeof(cl_mem), (void *)&d_transpose3); + checkError(status, "Failed to set transpose3D_kernel kernel arg 0"); - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_transpose3); - checkError(status, "Failed to set store1 kernel arg"); + status=clSetKernelArg(transpose3D_kernel, 1, sizeof(cl_mem), (void *)&d_transpose3); + checkError(status, "Failed to set transpose3D_kernel kernel arg 1"); - status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData1); - checkError(status, "Failed to set store2 kernel arg"); + mode = WR_GLOBALMEM; + status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); + checkError(status, "Failed to set transpose3D kernel arg 2"); ; + + status=clSetKernelArg(store_kernel, 0, sizeof(cl_mem), (void *)&d_outData1); + checkError(status, "Failed to set store kernel arg"); } // Set Kernel Arguments before execution - status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); + status = clEnqueueTask(queue3, store_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); - status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fft kernel"); - status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); + status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose3D_kernel kernel"); + + mode = RD_GLOBALMEM; + status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); + checkError(status, "Failed to set transpose3D kernel arg 2"); + + status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose3D_kernel kernel"); status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); checkError(status, "Failed to launch second fft kernel"); - status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second transpose kernel"); - - status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); - status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fft kernel"); - status = clEnqueueTask(queue3, store2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); + status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); - status = clFinish(queue7); + status = clFinish(queue1); checkError(status, "failed to finish"); - status = clFinish(queue6); + status = clFinish(queue2); checkError(status, "failed to finish"); - status = clFinish(queue5); + status = clFinish(queue3); checkError(status, "failed to finish"); status = clFinish(queue4); checkError(status, "failed to finish"); - status = clFinish(queue3); + status = clFinish(queue5); checkError(status, "failed to finish"); - status = clFinish(queue2); + status = clFinish(queue6); checkError(status, "failed to finish"); - status = clFinish(queue1); + status = clFinish(queue7); checkError(status, "failed to finish"); clWaitForEvents(2, write_event); @@ -677,106 +697,125 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, const float2 *inp, float2 *out, bool inv status = clEnqueueReadBuffer(queue6, d_outData3, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(how_many - 2) * num_pts], 0, NULL, &write_event[0]); checkError(status, "Failed to read from DDR buffer"); - status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData4); + status=clSetKernelArg(fetch_kernel, 0, sizeof(cl_mem), (void *)&d_inData4); checkError(status, "Failed to set fetch1 kernel arg"); - status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_transpose2); - checkError(status, "Failed to set store1 kernel arg"); + status=clSetKernelArg(transpose3D_kernel, 0, sizeof(cl_mem), (void *)&d_transpose2); + checkError(status, "Failed to set transpose3D_kernel kernel arg 0"); - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_transpose2); - checkError(status, "Failed to set store1 kernel arg"); - - status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData4); + status=clSetKernelArg(transpose3D_kernel, 1, sizeof(cl_mem), (void *)&d_transpose2); + checkError(status, "Failed to set transpose3D_kernel kernel arg 1"); + + mode = WR_GLOBALMEM; + status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); + checkError(status, "Failed to set transpose3D kernel arg 2"); + + status=clSetKernelArg(store_kernel, 0, sizeof(cl_mem), (void *)&d_outData4); checkError(status, "Failed to set store2 kernel arg"); } else if((how_many % 4) == 1){ status = clEnqueueReadBuffer(queue6, d_outData4, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(how_many - 2) * num_pts], 0, NULL, &write_event[0]); checkError(status, "Failed to read from DDR buffer"); - status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData1); + status=clSetKernelArg(fetch_kernel, 0, sizeof(cl_mem), (void *)&d_inData1); checkError(status, "Failed to set fetch1 kernel arg"); - status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_transpose3); - checkError(status, "Failed to set store1 kernel arg"); + status=clSetKernelArg(transpose3D_kernel, 0, sizeof(cl_mem), (void *)&d_transpose3); + checkError(status, "Failed to set transpose3D_kernel kernel arg 0"); + + status=clSetKernelArg(transpose3D_kernel, 1, sizeof(cl_mem), (void *)&d_transpose3); + checkError(status, "Failed to set transpose3D_kernel kernel arg 1"); - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_transpose3); - checkError(status, "Failed to set store1 kernel arg"); + mode = WR_GLOBALMEM; + status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); + checkError(status, "Failed to set transpose3D kernel arg 2"); - status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData1); + status=clSetKernelArg(store_kernel, 0, sizeof(cl_mem), (void *)&d_outData1); checkError(status, "Failed to set store2 kernel arg"); } else if((how_many % 4) == 2){ status = clEnqueueReadBuffer(queue6, d_outData1, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(how_many - 2) * num_pts], 0, NULL, &write_event[0]); checkError(status, "Failed to read from DDR buffer"); - status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData2); + status=clSetKernelArg(fetch_kernel, 0, sizeof(cl_mem), (void *)&d_inData2); checkError(status, "Failed to set fetch1 kernel arg"); - status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_transpose4); - checkError(status, "Failed to set store1 kernel arg"); + status=clSetKernelArg(transpose3D_kernel, 0, sizeof(cl_mem), (void *)&d_transpose4); + checkError(status, "Failed to set transpose3D_kernel kernel arg 0"); - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_transpose4); - checkError(status, "Failed to set store1 kernel arg"); + status=clSetKernelArg(transpose3D_kernel, 1, sizeof(cl_mem), (void *)&d_transpose4); + checkError(status, "Failed to set transpose3D_kernel kernel arg 1"); + + mode = WR_GLOBALMEM; + status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); + checkError(status, "Failed to set transpose3D kernel arg 2"); - status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData2); + status=clSetKernelArg(store_kernel, 0, sizeof(cl_mem), (void *)&d_outData2); checkError(status, "Failed to set store2 kernel arg"); } else{ status = clEnqueueReadBuffer(queue6, d_outData2, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(how_many - 2) * num_pts], 0, NULL, &write_event[0]); checkError(status, "Failed to read from DDR buffer"); - status=clSetKernelArg(fetch1_kernel, 0, sizeof(cl_mem), (void *)&d_inData3); + status=clSetKernelArg(fetch_kernel, 0, sizeof(cl_mem), (void *)&d_inData3); checkError(status, "Failed to set fetch1 kernel arg"); - status=clSetKernelArg(store1_kernel, 0, sizeof(cl_mem), (void *)&d_transpose1); - checkError(status, "Failed to set store1 kernel arg"); + status=clSetKernelArg(transpose3D_kernel, 0, sizeof(cl_mem), (void *)&d_transpose1); + checkError(status, "Failed to set transpose3D_kernel kernel arg 0"); + + status=clSetKernelArg(transpose3D_kernel, 1, sizeof(cl_mem), (void *)&d_transpose1); + checkError(status, "Failed to set transpose3D_kernel kernel arg 1"); - status=clSetKernelArg(fetch2_kernel, 0, sizeof(cl_mem), (void *)&d_transpose1); - checkError(status, "Failed to set store1 kernel arg"); + mode = WR_GLOBALMEM; + status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); + checkError(status, "Failed to set transpose3D kernel arg 2"); - status=clSetKernelArg(store2_kernel, 0, sizeof(cl_mem), (void *)&d_outData3); + status=clSetKernelArg(store_kernel, 0, sizeof(cl_mem), (void *)&d_outData3); checkError(status, "Failed to set store2 kernel arg"); } - status = clEnqueueTask(queue1, fetch1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); + status = clEnqueueTask(queue3, store_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); - status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fft kernel"); - status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); + status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose3D_kernel kernel"); + + mode = RD_GLOBALMEM; + status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); + checkError(status, "Failed to set transpose3D kernel arg 2"); + + status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose3D_kernel kernel"); status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); checkError(status, "Failed to launch second fft kernel"); - status = clEnqueueTask(queue5, store1_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second transpose kernel"); - - status = clEnqueueTask(queue5, fetch2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); - status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fft kernel"); - status = clEnqueueTask(queue3, store2_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); + status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); - status = clFinish(queue6); - checkError(status, "failed to finish"); - status = clFinish(queue5); + clWaitForEvents(1, &write_event[0]); + clReleaseEvent(write_event[0]); + status = clFinish(queue1); checkError(status, "failed to finish"); - status = clFinish(queue4); + status = clFinish(queue2); checkError(status, "failed to finish"); status = clFinish(queue3); checkError(status, "failed to finish"); - status = clFinish(queue2); + status = clFinish(queue4); checkError(status, "failed to finish"); - status = clFinish(queue1); + status = clFinish(queue5); + checkError(status, "failed to finish"); + status = clFinish(queue6); checkError(status, "failed to finish"); - - clWaitForEvents(1, &write_event[0]); - clReleaseEvent(write_event[0]); if( (how_many % 4) == 0){ status = clEnqueueReadBuffer(queue6, d_outData4, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(how_many - 1) * num_pts], 0, NULL, &write_event[0]); @@ -833,10 +872,10 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, const float2 *inp, float2 *out, bool inv if (d_transpose4) clReleaseMemObject(d_transpose4); - if(fetch1_kernel) - clReleaseKernel(fetch1_kernel); - if(fetch2_kernel) - clReleaseKernel(fetch2_kernel); + if(fetch_kernel) + clReleaseKernel(fetch_kernel); + if(transpose3D_kernel) + clReleaseKernel(transpose3D_kernel); if(ffta_kernel) clReleaseKernel(ffta_kernel); @@ -848,10 +887,8 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(int N, const float2 *inp, float2 *out, bool inv if(transpose_kernel) clReleaseKernel(transpose_kernel); - if(store1_kernel) - clReleaseKernel(store1_kernel); - if(store2_kernel) - clReleaseKernel(store2_kernel); + if(store_kernel) + clReleaseKernel(store_kernel); fft_time.valid = 1; return fft_time; diff --git a/api/src/fft3d_svm.c b/api/src/fft3d_svm.c index c4bc9a1..b9f3c4b 100644 --- a/api/src/fft3d_svm.c +++ b/api/src/fft3d_svm.c @@ -14,7 +14,6 @@ #include "opencl_utils.h" #include "misc.h" #include "svm.h" -#include "/opt/intelFPGA_pro/19.2.0/hld/board/custom_platform_toolkit/mmd/aocl_mmd.h" #define WR_GLOBALMEM 0 #define RD_GLOBALMEM 1 @@ -22,11 +21,11 @@ /** * \brief compute an out-of-place single precision complex 3D FFT using the DDR for 3D Transpose where the data access between the host and the FPGA is using Shared Virtual Memory (SVM) - * \param N : integer pointer addressing the size of FFT3d + * \param N : unsigned integer denoting the size of FFT3d * \param inp : float2 pointer to input data of size [N * N * N] * \param out : float2 pointer to output data of size [N * N * N] - * \param inv : int toggle to activate backward FFT - * \param interleaving : 1 if using burst interleaved global memory buffers + * \param inv : toggle to activate backward FFT + * \param interleaving : toggle to use burst interleaved global memory buffers * \return fpga_t : time taken in milliseconds for data transfers and execution */ fpga_t fftfpgaf_c2c_3d_ddr_svm(const unsigned N, const float2 *inp, float2 *out, const bool inv, const bool interleaving) { @@ -234,11 +233,11 @@ fpga_t fftfpgaf_c2c_3d_ddr_svm(const unsigned N, const float2 *inp, float2 *out, /** - * \brief compute an batched out-of-place single precision complex 3D-FFT using the DDR of the FPGA for 3D Transpose and for data transfers between host's main memory and FPGA using Shared Virtual Memory - * \param N : integer pointer addressing the size of FFT3d + * \brief compute a batched out-of-place single precision complex 3D-FFT using the DDR of the FPGA for 3D Transpose and for data transfers between host's main memory and FPGA using Shared Virtual Memory + * \param N : unsigned integer denoting the size of FFT3d * \param inp : float2 pointer to input data of size [N * N * N] * \param out : float2 pointer to output data of size [N * N * N] - * \param inv : int toggle to activate backward FFT + * \param inv : toggle to activate backward FFT * \param how_many : number of batched computations * \return fpga_t : time taken in milliseconds for data transfers and execution */ diff --git a/examples/fft.cpp b/examples/fft.cpp index 7f3ef1c..15476a7 100644 --- a/examples/fft.cpp +++ b/examples/fft.cpp @@ -17,8 +17,7 @@ int main(int argc, char* argv[]){ else platform = "Intel(R) FPGA SDK for OpenCL(TM)"; - bool use_usm = false; - int isInit = fpga_initialize(platform, config.path.data(), use_usm); + int isInit = fpga_initialize(platform, config.path.data(), config.use_usm); if(isInit != 0){ cerr << "FPGA initialization error\n"; return EXIT_FAILURE; @@ -50,12 +49,12 @@ int main(int argc, char* argv[]){ case 3:{ if(config.use_bram) runtime[i] = fftfpgaf_c2c_3d_bram(num, inp, out, inv, burst); - else if(!config.use_bram && (config.batch > 1)) + else if(!config.use_bram && (!config.use_usm) && (config.batch > 1)) runtime[i] = fftfpgaf_c2c_3d_ddr_batch(num, inp, out, inv, burst, config.batch); - else if (use_usm == 1){ + else if(config.use_usm){ if(config.batch > 1) runtime[i] = fftfpgaf_c2c_3d_ddr_svm_batch(num, inp, out, inv, config.batch); - else + else runtime[i] = fftfpgaf_c2c_3d_ddr_svm(num, inp, out, inv, burst); break; } diff --git a/kernels/fft3d/fft3d_ddr.cl b/kernels/fft3d/fft3d_ddr.cl index 8bcede3..9e7fd89 100755 --- a/kernels/fft3d/fft3d_ddr.cl +++ b/kernels/fft3d/fft3d_ddr.cl @@ -11,11 +11,15 @@ channel float2 chaninfft3db[POINTS]; channel float2 chaninfft3dc[POINTS]; channel float2 chaninTranspose[POINTS]; -channel float2 chaninTranStore1[POINTS]; -channel float2 chaninTranStore2[POINTS]; +channel float2 chaninTranspose3D[POINTS]; +channel float2 chaninStore[POINTS]; + +#define WR_GLOBALMEM 0 +#define RD_GLOBALMEM 1 +#define BATCH 2 // Kernel that fetches data from global memory -kernel void fetchBitrev1(__global __attribute__((buffer_location(SVM_HOST_BUFFER_LOCATION))) volatile float2 * restrict src) { +kernel void fetch(__global __attribute__((buffer_location(SVM_HOST_BUFFER_LOCATION))) volatile float2 * restrict src) { unsigned delay = (1 << (LOGN - LOGPOINTS)); // N / 8 bool is_bitrevA = false; @@ -62,9 +66,6 @@ kernel void fetchBitrev1(__global __attribute__((buffer_location(SVM_HOST_BUFFER } } -/* This single work-item task wraps the FFT engine - * 'inverse' toggles between the direct and the inverse transform - */ kernel void fft3da(int inverse) { /* The FFT engine requires a sliding window for data reordering; data stored @@ -112,7 +113,6 @@ kernel void fft3da(int inverse) { } } -__attribute__((max_global_work_dim(0))) kernel void transpose() { const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 bool is_bufA = false, is_bitrevA = false; @@ -216,158 +216,157 @@ kernel void fft3db(int inverse) { data = fft_step(data, i % (N / POINTS), fft_delay_elements, inverse, LOGN); if (i >= N / POINTS - 1) { - write_channel_intel(chaninTranStore1[0], data.i0); - write_channel_intel(chaninTranStore1[1], data.i1); - write_channel_intel(chaninTranStore1[2], data.i2); - write_channel_intel(chaninTranStore1[3], data.i3); - write_channel_intel(chaninTranStore1[4], data.i4); - write_channel_intel(chaninTranStore1[5], data.i5); - write_channel_intel(chaninTranStore1[6], data.i6); - write_channel_intel(chaninTranStore1[7], data.i7); + write_channel_intel(chaninTranspose3D[0], data.i0); + write_channel_intel(chaninTranspose3D[1], data.i1); + write_channel_intel(chaninTranspose3D[2], data.i2); + write_channel_intel(chaninTranspose3D[3], data.i3); + write_channel_intel(chaninTranspose3D[4], data.i4); + write_channel_intel(chaninTranspose3D[5], data.i5); + write_channel_intel(chaninTranspose3D[6], data.i6); + write_channel_intel(chaninTranspose3D[7], data.i7); } } } } -__attribute__((max_global_work_dim(0))) -kernel void transposeStore1(__global __attribute__((buffer_location(DDR_BUFFER_LOCATION))) volatile float2 * restrict dest) { +kernel void transpose3D( + __global __attribute__((buffer_location(DDR_BUFFER_LOCATION))) float2 * restrict src, + __global __attribute__((buffer_location(DDR_BUFFER_LOCATION))) float2 * restrict dest, + const int mode) { - const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 + const int initial_delay = (1 << (LOGN - LOGPOINTS)); // N / 8 for the bitrev buffers bool is_bufA = false, is_bitrevA = false; + bool is_bufB = false, is_bitrevB = false; + + float2 buf_wr[2][DEPTH][POINTS]; + float2 buf_rd[2][DEPTH][POINTS]; - float2 buf[2][DEPTH][POINTS]; //float2 __attribute__((memory, numbanks(8))) bitrev_in[2][N]; float2 bitrev_in[2][N]; - - int initial_delay = DELAY; // for each of the bitrev buffer - // additional iterations to fill the buffers - for(int step = -initial_delay; step < ((N * DEPTH) + DEPTH); step++){ - - float2x8 data, data_out; - if (step < ((N * DEPTH) - initial_delay)) { - data.i0 = read_channel_intel(chaninTranStore1[0]); - data.i1 = read_channel_intel(chaninTranStore1[1]); - data.i2 = read_channel_intel(chaninTranStore1[2]); - data.i3 = read_channel_intel(chaninTranStore1[3]); - data.i4 = read_channel_intel(chaninTranStore1[4]); - data.i5 = read_channel_intel(chaninTranStore1[5]); - data.i6 = read_channel_intel(chaninTranStore1[6]); - data.i7 = read_channel_intel(chaninTranStore1[7]); - } else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } - // Swap buffers every N*N/8 iterations - // starting from the additional delay of N/8 iterations - is_bufA = (( step & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; - - // Swap bitrev buffers every N/8 iterations - is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; - - unsigned row = step & (DEPTH - 1); - data = bitreverse_in(data, - is_bitrevA ? bitrev_in[0] : bitrev_in[1], - is_bitrevA ? bitrev_in[1] : bitrev_in[0], - row); - - writeBuf(data, - is_bufA ? buf[0] : buf[1], - step, 0); - - data_out = readBuf_store( - is_bufA ? buf[1] : buf[0], - step); - - if (step >= (DEPTH)) { - unsigned index = (step - DEPTH) * 8; - - dest[index + 0] = data_out.i0; - dest[index + 1] = data_out.i1; - dest[index + 2] = data_out.i2; - dest[index + 3] = data_out.i3; - dest[index + 4] = data_out.i4; - dest[index + 5] = data_out.i5; - dest[index + 6] = data_out.i6; - dest[index + 7] = data_out.i7; - } - } -} -__attribute__((max_global_work_dim(0))) -kernel void fetchBitrev2(__global __attribute__((buffer_location(DDR_BUFFER_LOCATION))) volatile float2 * restrict src) { - unsigned delay = (1 << (LOGN - LOGPOINTS)); // N / 8 - - bool is_bufA = false, is_bitrevA = false; float2 __attribute__((memory, numbanks(8))) bitrev_out[2][N]; - float2 buf[2][DEPTH][POINTS]; - - // additional iterations to fill the buffers - for(unsigned step = 0; step < (N * DEPTH) + DEPTH + delay; step++){ - // increment z by 1 every N/8 steps until (N*N/ 8) - unsigned start_index = step + delay; - unsigned zdim = (step >> (LOGN - LOGPOINTS)) & (N - 1); - - // increment y by 1 every N*N/8 points until N - unsigned ydim = (step >> (LOGN + LOGN - LOGPOINTS)) & (N - 1); - - // increment by 8 until N / 8 - unsigned xdim = (step * 8) & (N - 1); - - // increment by 1 every N*N*N / 8 steps - unsigned batch_index = (step >> (LOGN + LOGN + LOGN - LOGPOINTS)); - unsigned index = (batch_index * N * N * N) + (zdim * N * N) + (ydim * N) + xdim; + // additional iterations to fill the buffers + for(int step = -initial_delay; step < ((N * DEPTH) + DEPTH); step++){ float2x8 data, data_out; - if (step < (N * DEPTH)) { - data.i0 = src[index + 0]; - data.i1 = src[index + 1]; - data.i2 = src[index + 2]; - data.i3 = src[index + 3]; - data.i4 = src[index + 4]; - data.i5 = src[index + 5]; - data.i6 = src[index + 6]; - data.i7 = src[index + 7]; - } else { - data.i0 = data.i1 = data.i2 = data.i3 = - data.i4 = data.i5 = data.i6 = data.i7 = 0; - } - - is_bufA = (( step & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; + float2x8 data_wr, data_wr_out; + if(mode == WR_GLOBALMEM || mode == BATCH){ + if (step < ((N * DEPTH) - initial_delay)) { + data.i0 = read_channel_intel(chaninTranspose3D[0]); + data.i1 = read_channel_intel(chaninTranspose3D[1]); + data.i2 = read_channel_intel(chaninTranspose3D[2]); + data.i3 = read_channel_intel(chaninTranspose3D[3]); + data.i4 = read_channel_intel(chaninTranspose3D[4]); + data.i5 = read_channel_intel(chaninTranspose3D[5]); + data.i6 = read_channel_intel(chaninTranspose3D[6]); + data.i7 = read_channel_intel(chaninTranspose3D[7]); + } else { + data.i0 = data.i1 = data.i2 = data.i3 = + data.i4 = data.i5 = data.i6 = data.i7 = 0; + } - // Swap bitrev buffers every N/8 iterations - is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + // Swap buffers every N*N/8 iterations + // starting from the additional delay of N/8 iterations + is_bufA = (( step & (DEPTH - 1)) == 0) ? !is_bufA: is_bufA; + + // Swap bitrev buffers every N/8 iterations + is_bitrevA = ( (step & ((N / 8) - 1)) == 0) ? !is_bitrevA: is_bitrevA; + + unsigned row = step & (DEPTH - 1); + data = bitreverse_in(data, + is_bitrevA ? bitrev_in[0] : bitrev_in[1], + is_bitrevA ? bitrev_in[1] : bitrev_in[0], + row); + + writeBuf(data, + is_bufA ? buf_wr[0] : buf_wr[1], + step, 0); + + data_out = readBuf_store( + is_bufA ? buf_wr[1] : buf_wr[0], + step); + + if (step >= (DEPTH)) { + unsigned index = (step - DEPTH) * 8; + + dest[index + 0] = data_out.i0; + dest[index + 1] = data_out.i1; + dest[index + 2] = data_out.i2; + dest[index + 3] = data_out.i3; + dest[index + 4] = data_out.i4; + dest[index + 5] = data_out.i5; + dest[index + 6] = data_out.i6; + dest[index + 7] = data_out.i7; + } + } // condition for writing to global memory + if(mode == RD_GLOBALMEM || mode == BATCH){ - writeBuf(data, - is_bufA ? buf[0] : buf[1], - step, 0); + unsigned step_rd = step + initial_delay; + // increment z by 1 every N/8 steps until (N*N/ 8) + unsigned start_index = step_rd + initial_delay; + unsigned zdim = (step_rd >> (LOGN - LOGPOINTS)) & (N - 1); - data_out = readBuf_fetch( - is_bufA ? buf[1] : buf[0], - step, 0); + // increment y by 1 every N*N/8 points until N + unsigned ydim = (step_rd >> (LOGN + LOGN - LOGPOINTS)) & (N - 1); - unsigned start_row = step & (DEPTH -1); - data_out = bitreverse_out( - is_bitrevA ? bitrev_out[0] : bitrev_out[1], - is_bitrevA ? bitrev_out[1] : bitrev_out[0], - data_out, start_row); + // increment by 8 until N / 8 + unsigned xdim = (step_rd * 8) & (N - 1); - if (step >= (DEPTH + delay)) { + // increment by 1 every N*N*N / 8 steps + unsigned batch_index = (step_rd >> (LOGN + LOGN + LOGN - LOGPOINTS)); + + unsigned index_wr = (batch_index * N * N * N) + (zdim * N * N) + (ydim * N) + xdim; + + //float2x8 data, data_out; + if (step < (N * DEPTH)) { + data_wr.i0 = src[index_wr + 0]; + data_wr.i1 = src[index_wr + 1]; + data_wr.i2 = src[index_wr + 2]; + data_wr.i3 = src[index_wr + 3]; + data_wr.i4 = src[index_wr + 4]; + data_wr.i5 = src[index_wr + 5]; + data_wr.i6 = src[index_wr + 6]; + data_wr.i7 = src[index_wr + 7]; + } else { + data_wr.i0 = data_wr.i1 = data_wr.i2 = data_wr.i3 = + data_wr.i4 = data_wr.i5 = data_wr.i6 = data_wr.i7 = 0; + } + + is_bufB = (( step_rd & (DEPTH - 1)) == 0) ? !is_bufB: is_bufB; + + // Swap bitrev buffers every N/8 iterations + is_bitrevB = ( (step_rd & ((N / 8) - 1)) == 0) ? !is_bitrevB: is_bitrevB; + + writeBuf(data_wr, + is_bufB ? buf_rd[0] : buf_rd[1], + step_rd, 0); + + data_wr_out = readBuf_fetch( + is_bufB ? buf_rd[1] : buf_rd[0], + step_rd, 0); + + unsigned start_row = step_rd & (DEPTH -1); + data_wr_out = bitreverse_out( + is_bitrevB ? bitrev_out[0] : bitrev_out[1], + is_bitrevB ? bitrev_out[1] : bitrev_out[0], + data_wr_out, start_row); + + if (step_rd >= (DEPTH + initial_delay)) { + + write_channel_intel(chaninfft3dc[0], data_wr_out.i0); + write_channel_intel(chaninfft3dc[1], data_wr_out.i1); + write_channel_intel(chaninfft3dc[2], data_wr_out.i2); + write_channel_intel(chaninfft3dc[3], data_wr_out.i3); + write_channel_intel(chaninfft3dc[4], data_wr_out.i4); + write_channel_intel(chaninfft3dc[5], data_wr_out.i5); + write_channel_intel(chaninfft3dc[6], data_wr_out.i6); + write_channel_intel(chaninfft3dc[7], data_wr_out.i7); + } - write_channel_intel(chaninfft3dc[0], data_out.i0); - write_channel_intel(chaninfft3dc[1], data_out.i1); - write_channel_intel(chaninfft3dc[2], data_out.i2); - write_channel_intel(chaninfft3dc[3], data_out.i3); - write_channel_intel(chaninfft3dc[4], data_out.i4); - write_channel_intel(chaninfft3dc[5], data_out.i5); - write_channel_intel(chaninfft3dc[6], data_out.i6); - write_channel_intel(chaninfft3dc[7], data_out.i7); - } + } // condition for reading from global memory } } -/* - * Input and output data in bit-reversed format - */ kernel void fft3dc(int inverse) { /* The FFT engine requires a sliding window for data reordering; data stored @@ -403,21 +402,20 @@ kernel void fft3dc(int inverse) { // Write result to channels if (i >= N / POINTS - 1) { - write_channel_intel(chaninTranStore2[0], data.i0); - write_channel_intel(chaninTranStore2[1], data.i1); - write_channel_intel(chaninTranStore2[2], data.i2); - write_channel_intel(chaninTranStore2[3], data.i3); - write_channel_intel(chaninTranStore2[4], data.i4); - write_channel_intel(chaninTranStore2[5], data.i5); - write_channel_intel(chaninTranStore2[6], data.i6); - write_channel_intel(chaninTranStore2[7], data.i7); + write_channel_intel(chaninStore[0], data.i0); + write_channel_intel(chaninStore[1], data.i1); + write_channel_intel(chaninStore[2], data.i2); + write_channel_intel(chaninStore[3], data.i3); + write_channel_intel(chaninStore[4], data.i4); + write_channel_intel(chaninStore[5], data.i5); + write_channel_intel(chaninStore[6], data.i6); + write_channel_intel(chaninStore[7], data.i7); } } } } -__attribute__((max_global_work_dim(0))) -kernel void transposeStore2(__global __attribute__((buffer_location(SVM_HOST_BUFFER_LOCATION))) volatile float2 * restrict dest) { +kernel void store(__global __attribute__((buffer_location(SVM_HOST_BUFFER_LOCATION))) volatile float2 * restrict dest) { const int DELAY = (1 << (LOGN - LOGPOINTS)); // N / 8 bool is_bufA = false, is_bitrevA = false; @@ -432,14 +430,14 @@ kernel void transposeStore2(__global __attribute__((buffer_location(SVM_HOST_BUF float2x8 data, data_out; if (step < ((N * DEPTH) - initial_delay)) { - data.i0 = read_channel_intel(chaninTranStore2[0]); - data.i1 = read_channel_intel(chaninTranStore2[1]); - data.i2 = read_channel_intel(chaninTranStore2[2]); - data.i3 = read_channel_intel(chaninTranStore2[3]); - data.i4 = read_channel_intel(chaninTranStore2[4]); - data.i5 = read_channel_intel(chaninTranStore2[5]); - data.i6 = read_channel_intel(chaninTranStore2[6]); - data.i7 = read_channel_intel(chaninTranStore2[7]); + data.i0 = read_channel_intel(chaninStore[0]); + data.i1 = read_channel_intel(chaninStore[1]); + data.i2 = read_channel_intel(chaninStore[2]); + data.i3 = read_channel_intel(chaninStore[3]); + data.i4 = read_channel_intel(chaninStore[4]); + data.i5 = read_channel_intel(chaninStore[5]); + data.i6 = read_channel_intel(chaninStore[6]); + data.i7 = read_channel_intel(chaninStore[7]); } else { data.i0 = data.i1 = data.i2 = data.i3 = data.i4 = data.i5 = data.i6 = data.i7 = 0; @@ -496,4 +494,4 @@ kernel void transposeStore2(__global __attribute__((buffer_location(SVM_HOST_BUF dest[index + 7] = data_out.i7; } } -} \ No newline at end of file +} From f20c744872649adb7b6aa7a4988dc34ee3c06c56 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Fri, 8 Oct 2021 12:40:43 +0200 Subject: [PATCH 49/76] updated ci --- .gitlab-ci.yml | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a2e326e..bf5234f 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -12,23 +12,15 @@ build-all: - mkdir -p build && cd build - cmake -DLOG_FFT_SIZE=6 -DCMAKE_BUILD_TYPE=Release .. - make - - make fft1d_emu - - make fft2d_ddr_emu - - make fft2d_bram_opt_emu - - make fft3d_ddr_emu - - make fft3d_bram_emu - - chmod +x bin/fft3d_bram bin/fft3d_ddr bin/fft2d bin/fft1d bin/test_fftfpga + - chmod +x bin/fft artifacts: paths: - - build/bin/emu_64_fft1d/fft1d.aocx - - build/bin/emu_64_fft2d_ddr/fft2d_ddr.aocx - - build/bin/emu_64_fft2d_bram_opt/fft2d_bram_opt.aocx - - build/bin/emu_64_fft3d_ddr/fft3d_ddr.aocx - - build/bin/emu_64_fft3d_bram/fft3d_bram.aocx + - build/bin/p520_hpc_sg280l/emulation/fft1d_64_nointer/fft1d_64.aocx + - build/bin/p520_hpc_sg280l/emulation/fft2d_ddr_64_nointer/fft2d_ddr.aocx + - build/bin/p520_hpc_sg280l/emulation/fft3d_ddr_64_nointer/fft3d_ddr.aocx + - build/bin/p520_hpc_sg280l/emulation/fft3d_bram_64_nointer/fft3d_bram.aocx - build/bin/test_fftfpga - - build/bin/fft3d - - build/bin/fft2d - - build/bin/fft1d + - build/bin/fft test-all: stage: test From fe14f166f8082d021f30410358dd411dfb9570fd Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Fri, 8 Oct 2021 17:48:28 +0200 Subject: [PATCH 50/76] updated tests --- api/include/fftfpga/fftfpga.h | 12 ++-- api/src/fft1d.c | 12 ++-- api/src/fftfpga.c | 6 +- api/src/fpga_state.h | 1 - api/src/opencl_utils.c | 2 +- examples/helper.cpp | 12 +--- examples/helper.hpp | 4 +- tests/CMakeLists.txt | 15 ++-- tests/test_fft1d_fpga.cpp | 48 ++----------- tests/test_fft2d_fpga.cpp | 77 ++------------------- tests/test_fft3d_fpga.cpp | 125 +++------------------------------- tests/test_fft_setup.cpp | 14 ++-- tests/test_misc.cpp | 43 ------------ tests/test_opencl_utils.cpp | 8 +-- 14 files changed, 59 insertions(+), 320 deletions(-) delete mode 100644 tests/test_misc.cpp diff --git a/api/include/fftfpga/fftfpga.h b/api/include/fftfpga/fftfpga.h index 943aa30..31e3935 100755 --- a/api/include/fftfpga/fftfpga.h +++ b/api/include/fftfpga/fftfpga.h @@ -53,7 +53,7 @@ extern "C" { -4 Failed to create program, file not found in path -5 Device does not support required SVM */ -extern int fpga_initialize(const char *platform_name, const char *path, bool use_svm); +extern int fpga_initialize(const char *platform_name, const char *path, const bool use_svm); /** * @brief Release FPGA Resources @@ -65,14 +65,14 @@ extern void fpga_final(); * @param sz : size_t - size to allocate * @return void ptr or NULL */ -extern void* fftfpga_complex_malloc(size_t sz); +extern void* fftfpga_complex_malloc(const size_t sz); /** * @brief Allocate memory of single precision complex floating points * @param sz : size_t : size to allocate * @return void ptr or NULL */ -extern void* fftfpgaf_complex_malloc(size_t sz); +extern void* fftfpgaf_complex_malloc(const size_t sz); /** * @brief compute an out-of-place double precision complex 1D-FFT on the FPGA @@ -83,7 +83,7 @@ extern void* fftfpgaf_complex_malloc(size_t sz); * @param iter : number of iterations of the N point FFT * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpga_c2c_1d(unsigned N, const double2 *inp, double2 *out, bool inv, unsigned iter); +extern fpga_t fftfpga_c2c_1d(const unsigned N, const double2 *inp, double2 *out, const bool inv, const unsigned iter); /** * @brief compute an out-of-place single precision complex 1D-FFT on the FPGA @@ -94,7 +94,7 @@ extern fpga_t fftfpga_c2c_1d(unsigned N, const double2 *inp, double2 *out, bool * @param iter : number of iterations of the N point FFT * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpgaf_c2c_1d(unsigned N, const float2 *inp, float2 *out, bool inv, unsigned iter); +extern fpga_t fftfpgaf_c2c_1d(const unsigned N, const float2 *inp, float2 *out, const bool inv, const unsigned iter); /** * @brief compute an out-of-place single precision complex 1D-FFT on the FPGA @@ -105,7 +105,7 @@ extern fpga_t fftfpgaf_c2c_1d(unsigned N, const float2 *inp, float2 *out, bool i * @param iter : number of iterations of the N point FFT * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpgaf_c2c_1d_svm(unsigned N, const float2 *inp, float2 *out, bool inv, unsigned batch); +extern fpga_t fftfpgaf_c2c_1d_svm(const unsigned N, const float2 *inp, float2 *out, const bool inv, const unsigned batch); /** * @brief compute an out-of-place single precision complex 2D-FFT using the BRAM of the FPGA diff --git a/api/src/fft1d.c b/api/src/fft1d.c index 7313b47..7c64122 100644 --- a/api/src/fft1d.c +++ b/api/src/fft1d.c @@ -24,7 +24,7 @@ * \param batch : number of batched executions of 1D FFT * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpga_c2c_1d(unsigned N, const double2 *inp, double2 *out, bool inv, unsigned batch){ +fpga_t fftfpga_c2c_1d(const unsigned N, const double2 *inp, double2 *out, const bool inv, const unsigned batch){ fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_kernel fetch_kernel = NULL, fft_kernel = NULL; cl_int status = 0; @@ -34,8 +34,6 @@ fpga_t fftfpga_c2c_1d(unsigned N, const double2 *inp, double2 *out, bool inv, un return fft_time; } - printf("-- Launching%s 1D FFT of %d batches \n", inv ? " inverse":"", batch); - queue_setup(); cl_mem d_inData, d_outData; @@ -143,11 +141,11 @@ fpga_t fftfpga_c2c_1d(unsigned N, const double2 *inp, double2 *out, bool inv, un * \param N : unsigned integer to the number of points in FFT1d * \param inp : float2 pointer to input data of size N * \param out : float2 pointer to output data of size N - * \param inv : true for backward transforms + * \param inv : toggle for backward transforms * \param batch : number of batched executions of 1D FFT * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_1d(unsigned N, const float2 *inp, float2 *out, bool inv, unsigned batch){ +fpga_t fftfpgaf_c2c_1d(const unsigned N, const float2 *inp, float2 *out, const bool inv, const unsigned batch){ fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_kernel kernel1 = NULL, kernel2 = NULL; @@ -253,11 +251,11 @@ fpga_t fftfpgaf_c2c_1d(unsigned N, const float2 *inp, float2 *out, bool inv, uns * \param N : unsigned integer to the number of points in 1D FFT * \param inp : float2 pointer to input data of size N * \param out : float2 pointer to output data of size N - * \param inv : int toggle to activate backward FFT + * \param inv : toggle to activate backward FFT * \param batch : number of batched executions of 1D FFT * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_1d_svm(unsigned N, const float2 *inp, float2 *out, bool inv, unsigned batch){ +fpga_t fftfpgaf_c2c_1d_svm(const unsigned N, const float2 *inp, float2 *out, const bool inv, const unsigned batch){ fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_int status = 0; unsigned num_pts = N * batch; diff --git a/api/src/fftfpga.c b/api/src/fftfpga.c index 489a5f7..bc0ec19 100755 --- a/api/src/fftfpga.c +++ b/api/src/fftfpga.c @@ -34,7 +34,7 @@ bool svm_enabled = false; * @param svm : 1 if svm * @return void ptr or NULL */ -void* fftfpga_complex_malloc(size_t sz){ +void* fftfpga_complex_malloc(const size_t sz){ if(sz == 0){ return NULL; } @@ -49,7 +49,7 @@ void* fftfpga_complex_malloc(size_t sz){ * @param svm : 1 if svm * @return void ptr or NULL */ -void* fftfpgaf_complex_malloc(size_t sz){ +void* fftfpgaf_complex_malloc(const size_t sz){ if(sz == 0){ return NULL; @@ -69,7 +69,7 @@ void* fftfpgaf_complex_malloc(size_t sz){ -4 Failed to create program, file not found in path -5 Device does not support required SVM */ -int fpga_initialize(const char *platform_name, const char *path, bool use_svm){ +int fpga_initialize(const char *platform_name, const char *path, const bool use_svm){ cl_int status = 0; printf("-- Initializing FPGA ...\n"); diff --git a/api/src/fpga_state.h b/api/src/fpga_state.h index 9a0bde3..c37bbb3 100644 --- a/api/src/fpga_state.h +++ b/api/src/fpga_state.h @@ -15,7 +15,6 @@ extern cl_command_queue queue4, queue5, queue6; extern cl_command_queue queue7, queue8; extern bool svm_enabled; -//extern int svm_handle; extern void queue_setup(); extern void queue_cleanup(); diff --git a/api/src/opencl_utils.c b/api/src/opencl_utils.c index de2bc8b..72af4e4 100755 --- a/api/src/opencl_utils.c +++ b/api/src/opencl_utils.c @@ -142,7 +142,7 @@ cl_program getProgramWithBinary(cl_context context, cl_device_id *devices, cl_ui char *binary, *binaries[num_devices]; cl_int bin_status, status; - if(num_devices == 0) + if(num_devices == 0 || context == NULL) return NULL; if (!fileExists(path)){ diff --git a/examples/helper.cpp b/examples/helper.cpp index 39f948f..e1ceed3 100644 --- a/examples/helper.cpp +++ b/examples/helper.cpp @@ -31,14 +31,6 @@ void create_data(float2 *inp, const unsigned num){ inp[i].x = (float)((float)rand() / (float)RAND_MAX); inp[i].y = (float)((float)rand() / (float)RAND_MAX); } - - /* - printf("Creating Data\n"); - for(unsigned i = 0; i < num; i++){ - printf("%u: (%f, %f)\n", i, inp[i].x, inp[i].y); - } - printf("\n"); - */ } /** @@ -95,7 +87,7 @@ void parse_args(int argc, char* argv[], CONFIG &config){ } } -void print_config(CONFIG config){ +void print_config(const CONFIG config){ printf("\n------------------------------------------\n"); printf("FFT CONFIGURATION: \n"); printf("--------------------------------------------\n"); @@ -119,7 +111,7 @@ void print_config(CONFIG config){ * \param config: struct of program state * \return true if verification passed */ -bool verify_fftwf(float2 *verify, float2 *fpgaout, const CONFIG config){ +bool verify_fftwf(const float2 *verify, float2 *fpgaout, const CONFIG config){ unsigned sz = pow(config.num, config.dim); unsigned total_sz = config.batch * sz; diff --git a/examples/helper.hpp b/examples/helper.hpp index 1fffdd3..0f47725 100644 --- a/examples/helper.hpp +++ b/examples/helper.hpp @@ -20,13 +20,13 @@ struct CONFIG{ void parse_args(int argc, char* argv[], CONFIG &config); -void print_config(CONFIG config); +void print_config(const CONFIG config); double getTimeinMilliSec(); void create_data(float2 *inp, const unsigned num); -bool verify_fftwf(float2 *verify, float2 *fpgaout, const CONFIG config); +bool verify_fftwf(const float2 *verify, float2 *fpgaout, const CONFIG config); void perf_measures(const CONFIG config, fpga_t *runtime); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 674bba2..3f3925d 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,23 +1,22 @@ # Author: Arjun Ramaswami cmake_minimum_required(VERSION 3.10) +project(testfftfpga VERSION 2.0 + DESCRIPTION "Tests for FFT using FPGAs" + LANGUAGES C CXX) add_executable(test_fftfpga - #${CMAKE_CURRENT_SOURCE_DIR}/test_fft_fpga.cpp test_fft_setup.cpp test_fft1d_fpga.cpp test_fft2d_fpga.cpp test_fft3d_fpga.cpp test_opencl_utils.cpp - test_misc.cpp - ${examplesfftfpga_SOURCE_DIR}/common/helper.c - ${examplesfftfpga_SOURCE_DIR}/common/verify_fftw.c ) target_include_directories(test_fftfpga - PUBLIC ${gtest_SOURCE_DIR}/include ${gtest_SOURCE_DIR} - ${IntelFPGAOpenCL_INCLUDE_DIRS} - ${examplesfftfpga_SOURCE_DIR}/common - ${CMAKE_SOURCE_DIR}/api/src + PUBLIC ${gtest_SOURCE_DIR}/include ${gtest_SOURCE_DIR} + ${IntelFPGAOpenCL_INCLUDE_DIRS} + ${examplesfftfpga_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/api/src ) target_link_libraries(test_fftfpga PUBLIC diff --git a/tests/test_fft1d_fpga.cpp b/tests/test_fft1d_fpga.cpp index f5beed1..c7ab436 100644 --- a/tests/test_fft1d_fpga.cpp +++ b/tests/test_fft1d_fpga.cpp @@ -1,28 +1,22 @@ // Author: Arjun Ramaswami #include "gtest/gtest.h" // finds this because gtest is linked -#include -#include -#include +#include +#include extern "C" { #include "CL/opencl.h" #include "fftfpga/fftfpga.h" - #include "helper.h" - #include "verify_fftw.h" - -#ifdef USE_FFTW - #include -#endif } +#include "helper.hpp" /** * \brief fftfpgaf_c2c_1d() */ TEST(fft1dFPGATest, InputValidity){ - const int N = (1 << 6); + const unsigned N = (1 << 6); + const size_t sz = sizeof(float2) * N; - size_t sz = sizeof(float2) * N; float2 *test = (float2*)malloc(sz); fpga_t fft_time = {0.0, 0.0, 0.0, 0}; @@ -41,39 +35,11 @@ TEST(fft1dFPGATest, InputValidity){ free(test); } -TEST(fft1dFPGATest, CorrectnessSp){ - // check correctness of output -#ifdef USE_FFTW - const int logN = 6; - int N = (1 << logN); - - size_t sz = sizeof(float2) * N; - float2 *inp = (float2*)fftfpgaf_complex_malloc(sz); - float2 *out = (float2*)fftfpgaf_complex_malloc(sz); - - // malloc data to input - fftf_create_data(inp, N); - - int isInit= fpga_initialize("Intel(R) FPGA", "emu_64_fft1d/fft1d.aocx", false); - ASSERT_EQ(isInit, 0); - - fpga_t fft_time = fftfpgaf_c2c_1d(64, inp, out, 0, 1); - - bool result = verify_fftwf(out, inp, N, 1, false, 1); - EXPECT_TRUE(result); - - free(inp); - free(out); - - fpga_final(); -#endif -} - /** * \brief fftfpgaf_c2c_1d() */ TEST(fft1dFPGATest, InputValiditySVM){ - const int N = (1 << 6); + const unsigned N = (1 << 6); size_t sz = sizeof(float2) * N; float2 *test = (float2*)malloc(sz); @@ -83,7 +49,7 @@ TEST(fft1dFPGATest, InputValiditySVM){ fft_time = fftfpgaf_c2c_1d_svm(N, test, test, false, 1); EXPECT_EQ(fft_time.valid, 0); - int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft1d/ff1d.aocx", true); + int isInit = fpga_initialize("Intel(R) FPGA Emulation Platform for OpenCL(TM)", "p520_hpc_sg280l/emulation/fft1d_64_nointer/fft1d.aocx", true); ASSERT_EQ(isInit, 0); // null inp ptr input diff --git a/tests/test_fft2d_fpga.cpp b/tests/test_fft2d_fpga.cpp index 721d0be..62cb14f 100644 --- a/tests/test_fft2d_fpga.cpp +++ b/tests/test_fft2d_fpga.cpp @@ -1,25 +1,21 @@ // Author: Arjun Ramaswami -#include "gtest/gtest.h" // finds this because gtest is linked -#include // malloc, free +#include +#include "gtest/gtest.h" #include -#ifdef USE_FFTW - #include -#endif +#include +#include "helper.hpp" extern "C" { #include "CL/opencl.h" #include "fftfpga/fftfpga.h" - #include "helper.h" - #include "verify_fftw.h" - #include } /** * \brief fftfpgaf_c2c_2d_bram() */ TEST(fft2dFPGATest, InputValidityBRAM){ - const int N = 64; + const unsigned N = 64; size_t sz = sizeof(float2) * N * N; float2 *test = (float2*)malloc(sz); @@ -40,40 +36,11 @@ TEST(fft2dFPGATest, InputValidityBRAM){ free(test); } -TEST(fft2dFPGATest, CorrectnessBRAM){ - // check correctness of output -#ifdef USE_FFTW - // malloc data to input - const int N = (1 << 6); - fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - - int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft2d_bram_opt/fft2d_bram_opt.aocx", false); - EXPECT_EQ(isInit, 0); - - size_t sz = sizeof(float2) * N * N; - float2 *inp = (float2*)fftfpgaf_complex_malloc(sz); - float2 *out = (float2*)fftfpgaf_complex_malloc(sz); - - fftf_create_data(inp, N * N); - - fft_time = fftfpgaf_c2c_2d_bram(N, inp, out, 0, 0, 1); - - int result = verify_fftwf(out, inp, N, 2, 0, 1); - - EXPECT_EQ(result, 1); - - free(inp); - free(out); - - fpga_final(); -#endif -} - /** * \brief fftfpgaf_c2c_2d_ddr() */ TEST(fft2dFPGATest, InputValidityDDR){ - const int N = 64; + const unsigned N = 64; size_t sz = sizeof(float2) * N * N; float2 *test = (float2*)malloc(sz); @@ -92,36 +59,4 @@ TEST(fft2dFPGATest, InputValidityDDR){ EXPECT_EQ(fft_time.valid, 0); free(test); -} - -/** - * \brief fftfpgaf_c2c_2d_ddr() - */ -TEST(fftFPGATest, ValidSp2dFFTDDR){ - // check correctness of output -#ifdef USE_FFTW - // malloc data to input - const int N = (1 << 6); - fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - - int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft2d_ddr/fft2d_ddr.aocx", 0); - ASSERT_EQ(isInit, 0); - - size_t sz = sizeof(float2) * N * N; - float2 *inp = (float2*)fftfpgaf_complex_malloc(sz); - float2 *out = (float2*)fftfpgaf_complex_malloc(sz); - - fftf_create_data(inp, N * N); - - fft_time = fftfpgaf_c2c_2d_ddr(N, inp, out, 0); - - int result = verify_fftwf(out, inp, N, 2, 0, 1); - - EXPECT_EQ(result, 1); - - free(inp); - free(out); - - fpga_final(); -#endif } \ No newline at end of file diff --git a/tests/test_fft3d_fpga.cpp b/tests/test_fft3d_fpga.cpp index 2970b6b..656d987 100644 --- a/tests/test_fft3d_fpga.cpp +++ b/tests/test_fft3d_fpga.cpp @@ -1,28 +1,22 @@ // Author: Arjun Ramaswami -#include "gtest/gtest.h" // finds this because gtest is linked -#include // malloc, free -#include -#include -#ifdef USE_FFTW - #include -#endif +#include +#include "gtest/gtest.h" +#include +#include "helper.hpp" extern "C" { #include "CL/opencl.h" #include "fftfpga/fftfpga.h" - #include "helper.h" - #include "verify_fftw.h" - #include } /** * \brief fftfpgaf_c2c_3d_bram() */ TEST(fft3dFPGATest, InputValidityBRAM){ - const int N = 64; + const unsigned N = 64; - size_t sz = sizeof(float2) * N * N * N; + const size_t sz = sizeof(float2) * N * N * N; float2 *test = (float2*)malloc(sz); fpga_t fft_time = {0.0, 0.0, 0.0, 0}; @@ -41,42 +35,13 @@ TEST(fft3dFPGATest, InputValidityBRAM){ free(test); } -TEST(fft3dFPGATest, CorrectnessBRAM){ - // check correctness of output -#ifdef USE_FFTW - // malloc data to input - const int N = (1 << 6); - fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - - int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft3d_bram/fft3d_bram.aocx", false); - ASSERT_EQ(isInit, 0); - - size_t sz = sizeof(float2) * N * N * N; - float2 *inp = (float2*)fftfpgaf_complex_malloc(sz); - float2 *out = (float2*)fftfpgaf_complex_malloc(sz); - - fftf_create_data(inp, N * N * N); - - fft_time = fftfpgaf_c2c_3d_bram(N, inp, out, 0, 0); - - int result = verify_fftwf(out, inp, N, 3, 0, 1); - - EXPECT_EQ(result, 1); - - free(inp); - free(out); - - fpga_final(); -#endif -} - /** * \brief fftfpgaf_c2c_3d_ddr() */ TEST(fft3dFPGATest, InputValidityDDR){ - const int N = 64; + const unsigned N = 64; + const size_t sz = sizeof(float2) * N * N * N; - size_t sz = sizeof(float2) * N * N * N; float2 *test = (float2*)malloc(sz); fpga_t fft_time = {0.0, 0.0, 0.0, 0}; @@ -95,45 +60,13 @@ TEST(fft3dFPGATest, InputValidityDDR){ free(test); } -/** - * \brief fftfpgaf_c2c_3d_ddr() - */ -TEST(fftFPGATest, ValidSp3dFFTDDR){ - // check correctness of output -#ifdef USE_FFTW - // malloc data to input - const int N = (1 << 6); - fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - - int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft3d_ddr/fft3d_ddr.aocx", 0); - ASSERT_EQ(isInit, 0); - - size_t sz = sizeof(float2) * N * N * N; - float2 *inp = (float2*)fftfpgaf_complex_malloc(sz); - float2 *out = (float2*)fftfpgaf_complex_malloc(sz); - - fftf_create_data(inp, N * N * N); - - fft_time = fftfpgaf_c2c_3d_ddr(N, inp, out, 0); - - int result = verify_fftwf(out, inp, N, 3, 0, 1); - - EXPECT_EQ(result, 1); - - free(inp); - free(out); - - fpga_final(); -#endif -} - /** * \brief fftfpgaf_c2c_3d_ddr_svm_batch() */ TEST(fft3dFPGATest, InputValidityDDRSVMBatch){ - const int N = 64; + const unsigned N = 64; + const size_t sz = sizeof(float2) * N * N * N* 2; - size_t sz = sizeof(float2) * N * N * N* 2; float2 *test = (float2*)malloc(sz); fpga_t fft_time = {0.0, 0.0, 0.0, 0}; @@ -159,41 +92,3 @@ TEST(fft3dFPGATest, InputValidityDDRSVMBatch){ free(test); } - - -/** - * \brief fftfpgaf_c2c_3d_ddr_svm_batch() - */ -TEST(fft3dFPGATest, ValidSp3dFFTDDRSVMBatch){ - // check correctness of output for a random number of batches -#ifdef USE_FFTW - // malloc data to input - const int N = (1 << 6); - fpga_t fft_time = {0.0, 0.0, 0.0, 0}; - - int isInit = fpga_initialize("Intel(R) FPGA", "emu_64_fft3d_ddr/fft3d_ddr.aocx", true); - ASSERT_EQ(isInit, 0); - - // Random number of batches between 1 and 10 - int how_many = 2; - //int how_many = (rand() % 10) + 1; - size_t sz = sizeof(float2) * N * N * N * how_many; - unsigned num_pts = how_many * N * N * N; - - float2 *inp = (float2*)fftfpgaf_complex_malloc(sz); - float2 *out = (float2*)fftfpgaf_complex_malloc(sz); - - fftf_create_data(inp, num_pts); - - fft_time = fftfpgaf_c2c_3d_ddr_svm_batch(N, inp, out, false, how_many); - - bool result = verify_fftwf(out, inp, N, 3, false, how_many); - - EXPECT_TRUE(result); - - free(inp); - free(out); - - fpga_final(); -#endif -} \ No newline at end of file diff --git a/tests/test_fft_setup.cpp b/tests/test_fft_setup.cpp index a8da7a9..9e7b5d7 100644 --- a/tests/test_fft_setup.cpp +++ b/tests/test_fft_setup.cpp @@ -1,12 +1,10 @@ // Author: Arjun Ramaswami -#include "gtest/gtest.h" // finds this because gtest is linked +#include #include -#include -#ifdef USE_FFTW - #include -#endif +#include +#include "gtest/gtest.h" extern "C" { #include "CL/opencl.h" #include "fftfpga/fftfpga.h" @@ -23,10 +21,12 @@ TEST(fftFPGASetupTest, ValidInit){ EXPECT_EQ(fpga_initialize("TEST", "fft1d_emulate.aocx", false), -2); // wrong path argument - EXPECT_EQ(fpga_initialize("Intel(R) FPGA", "TEST", false), -4); + const char* platform_name = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; + EXPECT_EQ(fpga_initialize(platform_name, "TEST", false), -4); // right path and platform names - EXPECT_EQ(fpga_initialize("Intel(R) FPGA", "emu_64_fft3d_bram/fft3d_bram.aocx", false), 0); + const char* path = "p520_hpc_sg280l/emulation/fft3d_bram_64_nointer/fft3d_bram.aocx"; + EXPECT_EQ(fpga_initialize(platform_name, path, false), 0); fpga_final(); } diff --git a/tests/test_misc.cpp b/tests/test_misc.cpp deleted file mode 100644 index eea37df..0000000 --- a/tests/test_misc.cpp +++ /dev/null @@ -1,43 +0,0 @@ -// Author: Arjun Ramaswami - -#include "gtest/gtest.h" // finds this because gtest is linked -#include - -extern "C" { - #include "CL/opencl.h" - #include "helper.h" -} - -/** - * \brief fftf_create_data - */ -TEST(HelperTest, CreateValidRandomSpData){ - int N = 8; - size_t sz = sizeof(float2) * N; - float2 *inp = (float2*)fftfpgaf_complex_malloc(sz); - - // sz 0 - EXPECT_FALSE(fftf_create_data(0, 1)); - - // good input - EXPECT_TRUE(fftf_create_data(inp, N)); - - free(inp); -} - -/** - * \brief fft_create_data - */ -TEST(HelperTest, CreateValidRandomDpData){ - int N = 8; - size_t sz = sizeof(double2) * N; - double2 *inp = (double2*)fftfpga_complex_malloc(sz); - - // sz 0 - EXPECT_FALSE(fft_create_data(0, 1)); - - // good input - EXPECT_TRUE(fft_create_data(inp, N)); - - free(inp); -} \ No newline at end of file diff --git a/tests/test_opencl_utils.cpp b/tests/test_opencl_utils.cpp index fadd29b..e537823 100755 --- a/tests/test_opencl_utils.cpp +++ b/tests/test_opencl_utils.cpp @@ -31,7 +31,7 @@ TEST_F(OpenCLUtilsTest, FindValidPlatform){ EXPECT_EQ(findPlatform("test"), nullptr); // correct platform name - pl_id = findPlatform("Intel(R) FPGA"); + pl_id = findPlatform("Intel(R) FPGA Emulation Platform for OpenCL(TM)"); ASSERT_NE(pl_id, nullptr); } @@ -63,11 +63,8 @@ TEST_F(OpenCLUtilsTest, FindValidDevice){ */ TEST_F(OpenCLUtilsTest, CreateValidProgram){ cl_int status = 0; - const char *path = "emu_64_fft3d_bram/fft3d_bram.aocx"; - + const char* path = "p520_hpc_sg280l/emulation/fft3d_bram_64_nointer/fft3d_bram.aocx"; // bad context - cl_context bad_context; - EXPECT_EQ(getProgramWithBinary(bad_context, &device, 1, path), nullptr); cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &status); checkError(status, "Context Fail"); @@ -84,5 +81,6 @@ TEST_F(OpenCLUtilsTest, CreateValidProgram){ // right path EXPECT_NE(getProgramWithBinary(context, &device, 1, path), nullptr); + //fpga_final(); } \ No newline at end of file From da403910d13ae723d5b5c6b871d66b71d53fcf0c Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Fri, 8 Oct 2021 17:49:10 +0200 Subject: [PATCH 51/76] removed extern modules --- .gitmodules | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100755 .gitmodules diff --git a/.gitmodules b/.gitmodules deleted file mode 100755 index bc2131a..0000000 --- a/.gitmodules +++ /dev/null @@ -1,12 +0,0 @@ -[submodule "extern/gtest"] - path = extern/gtest - url = https://github.com/google/googletest.git -[submodule "extern/hlslib"] - path = extern/hlslib - url = https://github.com/definelicht/hlslib.git -[submodule "extern/argparse"] - path = extern/argparse - url = https://github.com/cofyc/argparse.git -[submodule "extern/findFFTW"] - path = extern/findFFTW - url = https://github.com/egpbos/findFFTW.git From cd03621b043f06654699a400357978869b5bd7c6 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Fri, 8 Oct 2021 17:59:38 +0200 Subject: [PATCH 52/76] updated ci --- .gitlab-ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index bf5234f..e3a2aea 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -15,7 +15,8 @@ build-all: - chmod +x bin/fft artifacts: paths: - - build/bin/p520_hpc_sg280l/emulation/fft1d_64_nointer/fft1d_64.aocx + - build/bin/p520_hpc_sg280l/emulation/fft1d_64_nointer/fft1d.aocx + - build/bin/p520_hpc_sg280l/emulation/fft2d_bram_64_nointer/fft2d_bram.aocx - build/bin/p520_hpc_sg280l/emulation/fft2d_ddr_64_nointer/fft2d_ddr.aocx - build/bin/p520_hpc_sg280l/emulation/fft3d_ddr_64_nointer/fft3d_ddr.aocx - build/bin/p520_hpc_sg280l/emulation/fft3d_bram_64_nointer/fft3d_bram.aocx From 48f2e6193fcb50a10dbae40c64551daf042ebffe Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Fri, 8 Oct 2021 18:07:41 +0200 Subject: [PATCH 53/76] updated ci with correct emulation env var --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e3a2aea..e40d365 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -27,6 +27,6 @@ test-all: stage: test script: - cd build/bin/ - - CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA=1 ./test_fftfpga + - CL_CONFIG_CPU_EMULATE_DEVICES=1 ./test_fftfpga dependencies: - build-all \ No newline at end of file From 8b2639083496e3bedfd80428e1b5d398daa0e692 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Fri, 8 Oct 2021 18:14:14 +0200 Subject: [PATCH 54/76] updated ci --- tests/test_fft1d_fpga.cpp | 2 +- tests/test_fft_setup.cpp | 2 +- tests/test_opencl_utils.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_fft1d_fpga.cpp b/tests/test_fft1d_fpga.cpp index c7ab436..f75bf38 100644 --- a/tests/test_fft1d_fpga.cpp +++ b/tests/test_fft1d_fpga.cpp @@ -49,7 +49,7 @@ TEST(fft1dFPGATest, InputValiditySVM){ fft_time = fftfpgaf_c2c_1d_svm(N, test, test, false, 1); EXPECT_EQ(fft_time.valid, 0); - int isInit = fpga_initialize("Intel(R) FPGA Emulation Platform for OpenCL(TM)", "p520_hpc_sg280l/emulation/fft1d_64_nointer/fft1d.aocx", true); + int isInit = fpga_initialize("intel(r) fpga sdk for opencl(tm)", "p520_hpc_sg280l/emulation/fft1d_64_nointer/fft1d.aocx", true); ASSERT_EQ(isInit, 0); // null inp ptr input diff --git a/tests/test_fft_setup.cpp b/tests/test_fft_setup.cpp index 9e7b5d7..ab1efe9 100644 --- a/tests/test_fft_setup.cpp +++ b/tests/test_fft_setup.cpp @@ -21,7 +21,7 @@ TEST(fftFPGASetupTest, ValidInit){ EXPECT_EQ(fpga_initialize("TEST", "fft1d_emulate.aocx", false), -2); // wrong path argument - const char* platform_name = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; + const char* platform_name = "intel(r) fpga sdk for opencl(tm)"; EXPECT_EQ(fpga_initialize(platform_name, "TEST", false), -4); // right path and platform names diff --git a/tests/test_opencl_utils.cpp b/tests/test_opencl_utils.cpp index e537823..55a58a8 100755 --- a/tests/test_opencl_utils.cpp +++ b/tests/test_opencl_utils.cpp @@ -31,7 +31,7 @@ TEST_F(OpenCLUtilsTest, FindValidPlatform){ EXPECT_EQ(findPlatform("test"), nullptr); // correct platform name - pl_id = findPlatform("Intel(R) FPGA Emulation Platform for OpenCL(TM)"); + pl_id = findPlatform("intel(r) fpga sdk for opencl(tm)"); ASSERT_NE(pl_id, nullptr); } From 5f795e44d734e7ff22486c61f012c550bfd735c4 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Fri, 8 Oct 2021 18:21:15 +0200 Subject: [PATCH 55/76] Update .gitlab-ci.yml --- .gitlab-ci.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e40d365..d70594f 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -27,6 +27,8 @@ test-all: stage: test script: - cd build/bin/ + - ldd test_fftfpga + - ldd fftfpga - CL_CONFIG_CPU_EMULATE_DEVICES=1 ./test_fftfpga dependencies: - - build-all \ No newline at end of file + - build-all From fe24e322cd3759386fc50fb2a762054729311dea Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Tue, 12 Oct 2021 17:36:56 +0200 Subject: [PATCH 56/76] removed common cl files to common dir --- kernels/{fft1d => common}/fft_8.cl | 0 kernels/{fft1d => common}/twid_radix4_8.cl | 0 kernels/fft1d/fft1d.cl | 2 +- kernels/fft2d/fft2d_bram.cl | 2 +- kernels/fft2d/fft2d_ddr.cl | 2 +- kernels/fft2d/fft_8.cl | 349 -------------------- kernels/fft2d/twid_radix4_8.cl | 90 ------ kernels/fft3d/fft3d_bram.cl | 2 +- kernels/fft3d/fft3d_ddr.cl | 4 +- kernels/fft3d/fft3d_ddr_batch.cl | 2 +- kernels/fft3d/fft3d_ddr_svm.cl | 2 +- kernels/fft3d/fft_8.cl | 351 --------------------- kernels/fft3d/twid_radix4_8.cl | 89 ------ kernels/matrixTranspose/diagonal_bitrev.cl | 2 +- tests/test_fft1d_fpga.cpp | 2 +- tests/test_fft_setup.cpp | 2 +- tests/test_opencl_utils.cpp | 2 +- 17 files changed, 12 insertions(+), 891 deletions(-) rename kernels/{fft1d => common}/fft_8.cl (100%) rename kernels/{fft1d => common}/twid_radix4_8.cl (100%) delete mode 100644 kernels/fft2d/fft_8.cl delete mode 100644 kernels/fft2d/twid_radix4_8.cl delete mode 100755 kernels/fft3d/fft_8.cl delete mode 100755 kernels/fft3d/twid_radix4_8.cl diff --git a/kernels/fft1d/fft_8.cl b/kernels/common/fft_8.cl similarity index 100% rename from kernels/fft1d/fft_8.cl rename to kernels/common/fft_8.cl diff --git a/kernels/fft1d/twid_radix4_8.cl b/kernels/common/twid_radix4_8.cl similarity index 100% rename from kernels/fft1d/twid_radix4_8.cl rename to kernels/common/twid_radix4_8.cl diff --git a/kernels/fft1d/fft1d.cl b/kernels/fft1d/fft1d.cl index 250e9ae..105c7d0 100644 --- a/kernels/fft1d/fft1d.cl +++ b/kernels/fft1d/fft1d.cl @@ -44,7 +44,7 @@ */ // Include source code for an engine that produces 8 points each step -#include "fft_8.cl" +#include "../common/fft_8.cl" #pragma OPENCL EXTENSION cl_intel_channels : enable diff --git a/kernels/fft2d/fft2d_bram.cl b/kernels/fft2d/fft2d_bram.cl index d35d88e..eb9574d 100644 --- a/kernels/fft2d/fft2d_bram.cl +++ b/kernels/fft2d/fft2d_bram.cl @@ -1,7 +1,7 @@ // Author: Arjun Ramaswami #include "fft_config.h" -#include "fft_8.cl" +#include "../common/fft_8.cl" #include "../matrixTranspose/diagonal_bitrev.cl" #pragma OPENCL EXTENSION cl_intel_channels : enable diff --git a/kernels/fft2d/fft2d_ddr.cl b/kernels/fft2d/fft2d_ddr.cl index e29d0c7..63b6b2b 100644 --- a/kernels/fft2d/fft2d_ddr.cl +++ b/kernels/fft2d/fft2d_ddr.cl @@ -62,7 +62,7 @@ */ // Include source code for an engine that produces 8 points each step -#include "fft_8.cl" +#include "../common/fft_8.cl" // Source the log(size) (log(1k) = 10) from a header shared with the host code #include "fft_config.h" diff --git a/kernels/fft2d/fft_8.cl b/kernels/fft2d/fft_8.cl deleted file mode 100644 index 750322e..0000000 --- a/kernels/fft2d/fft_8.cl +++ /dev/null @@ -1,349 +0,0 @@ -// Copyright (C) 2013-2019 Altera Corporation, San Jose, California, USA. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to -// whom the Software is furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. -// -// This agreement shall be governed in all respects by the laws of the State of California and -// by the laws of the United States of America. - -// Complex single-precision floating-point radix-4 feedforward FFT / iFFT engine -// -// See Mario Garrido, Jesús Grajal, M. A. Sanchez, Oscar Gustafsson: -// Pipeline Radix-2k Feedforward FFT Architectures. -// IEEE Trans. VLSI Syst. 21(1): 23-32 (2013)) -// -// The log(size) of the transform must be a compile-time constant argument. -// This FFT engine processes 8 points for each invocation. The inputs are eight -// ordered streams while the outputs are in bit reversed order. -// -// The entry point of the engine is the 'fft_step' function. This function -// passes 8 data points through a fixed sequence of processing blocks -// (butterfly, rotation, swap, reorder, multiplications, etc.) and produces -// 8 output points towards the overall FFT transform. -// -// The engine is designed to be invoked from a loop in a single work-item task. -// When compiling a single work-item task, the compiler leverages pipeline -// parallelism and overlaps the execution of multiple invocations of this -// function. A new instance can start processing every clock cycle - - -// Includes tabled twiddle factors - storing constants uses fewer resources -// than instantiating 'cos' or 'sin' hardware -#include "twid_radix4_8.cl" - -// Convenience struct representing the 8 data points processed each step -// Each member is a float2 representing a complex number -typedef struct { - float2 i0; - float2 i1; - float2 i2; - float2 i3; - float2 i4; - float2 i5; - float2 i6; - float2 i7; -} float2x8; - -// FFT butterfly building block -float2x8 butterfly(float2x8 data) { - float2x8 res; - res.i0 = data.i0 + data.i1; - res.i1 = data.i0 - data.i1; - res.i2 = data.i2 + data.i3; - res.i3 = data.i2 - data.i3; - res.i4 = data.i4 + data.i5; - res.i5 = data.i4 - data.i5; - res.i6 = data.i6 + data.i7; - res.i7 = data.i6 - data.i7; - return res; -} - -// Swap real and imaginary components in preparation for inverse transform -float2x8 swap_complex(float2x8 data) { - float2x8 res; - res.i0.x = data.i0.y; - res.i0.y = data.i0.x; - res.i1.x = data.i1.y; - res.i1.y = data.i1.x; - res.i2.x = data.i2.y; - res.i2.y = data.i2.x; - res.i3.x = data.i3.y; - res.i3.y = data.i3.x; - res.i4.x = data.i4.y; - res.i4.y = data.i4.x; - res.i5.x = data.i5.y; - res.i5.y = data.i5.x; - res.i6.x = data.i6.y; - res.i6.y = data.i6.x; - res.i7.x = data.i7.y; - res.i7.y = data.i7.x; - return res; -} - -// FFT trivial rotation building block -float2x8 trivial_rotate(float2x8 data) { - float2 tmp = data.i3; - data.i3.x = tmp.y; - data.i3.y = -tmp.x; - tmp = data.i7; - data.i7.x = tmp.y; - data.i7.y = -tmp.x; - return data; -} - -// FFT data swap building block associated with trivial rotations -float2x8 trivial_swap(float2x8 data) { - float2 tmp = data.i1; - data.i1 = data.i2; - data.i2 = tmp; - tmp = data.i5; - data.i5 = data.i6; - data.i6 = tmp; - return data; -} - -// FFT data swap building block associated with complex rotations -float2x8 swap(float2x8 data) { - float2 tmp = data.i1; - data.i1 = data.i4; - float2 tmp2 = data.i2; - data.i2 = tmp; - tmp = data.i3; - data.i3 = data.i5; - data.i4 = tmp2; - data.i5 = data.i6; - data.i6 = tmp; - return data; -} - -// This function "delays" the input by 'depth' steps -// Input 'data' from invocation N would be returned in invocation N + depth -// The 'shift_reg' sliding window is shifted by 1 element at every invocation -float2 delay(float2 data, const int depth, float2 *shift_reg) { - shift_reg[depth] = data; - return shift_reg[0]; -} - -// FFT data reordering building block. Implements the reordering depicted below -// (for depth = 2). The first valid outputs are in invocation 4 -// Invocation count: 0123... 01234567... -// data.i0 : GECA... ----> DBCA... -// data.i1 : HFDB... ----> HFGE... - -float2x8 reorder_data(float2x8 data, const int depth, float2 * shift_reg, bool toggle) { - // Use disconnected segments of length 'depth + 1' elements starting at - // 'shift_reg' to implement the delay elements. At the end of each FFT step, - // the contents of the entire buffer is shifted by 1 element - data.i1 = delay(data.i1, depth, shift_reg); - data.i3 = delay(data.i3, depth, shift_reg + depth + 1); - data.i5 = delay(data.i5, depth, shift_reg + 2 * (depth + 1)); - data.i7 = delay(data.i7, depth, shift_reg + 3 * (depth + 1)); - - if (toggle) { - float2 tmp = data.i0; - data.i0 = data.i1; - data.i1 = tmp; - tmp = data.i2; - data.i2 = data.i3; - data.i3 = tmp; - tmp = data.i4; - data.i4 = data.i5; - data.i5 = tmp; - tmp = data.i6; - data.i6 = data.i7; - data.i7 = tmp; - } - - data.i0 = delay(data.i0, depth, shift_reg + 4 * (depth + 1)); - data.i2 = delay(data.i2, depth, shift_reg + 5 * (depth + 1)); - data.i4 = delay(data.i4, depth, shift_reg + 6 * (depth + 1)); - data.i6 = delay(data.i6, depth, shift_reg + 7 * (depth + 1)); - - return data; -} - -// Implements a complex number multiplication -float2 comp_mult(float2 a, float2 b) { - float2 res; - res.x = a.x * b.x - a.y * b.y; - res.y = a.x * b.y + a.y * b.x; - return res; -} - -// Produces the twiddle factor associated with a processing stream 'stream', -// at a specified 'stage' during a step 'index' of the computation -// -// If there are precomputed twiddle factors for the given FFT size, uses them -// This saves hardware resources, because it avoids evaluating 'cos' and 'sin' -// functions - -float2 twiddle(int index, int stage, int size, int stream) { - float2 twid; - // Coalesces the twiddle tables for indexed access - constant float * twiddles_cos[TWID_STAGES][6] = { - {tc00, tc01, tc02, tc03, tc04, tc05}, - {tc10, tc11, tc12, tc13, tc14, tc15}, - {tc20, tc21, tc22, tc23, tc24, tc25}, - {tc30, tc31, tc32, tc33, tc34, tc35}, - {tc40, tc41, tc42, tc43, tc44, tc45} - }; - constant float * twiddles_sin[TWID_STAGES][6] = { - {ts00, ts01, ts02, ts03, ts04, ts05}, - {ts10, ts11, ts12, ts13, ts14, ts15}, - {ts20, ts21, ts22, ts23, ts24, ts25}, - {ts30, ts31, ts32, ts33, ts34, ts35}, - {ts40, ts41, ts42, ts43, ts44, ts45} - }; - - // Use the precomputed twiddle fators, if available - otherwise, compute them - int twid_stage = stage >> 1; - if (size <= (1 << (TWID_STAGES * 2 + 2))) { - twid.x = twiddles_cos[twid_stage][stream] - [index * ((1 << (TWID_STAGES * 2 + 2)) / size)]; - twid.y = twiddles_sin[twid_stage][stream] - [index * ((1 << (TWID_STAGES * 2 + 2)) / size)]; - } else { - // This would generate hardware consuming a large number of resources - // Instantiated only if precomputed twiddle factors are available - const float TWOPI = 2.0f * M_PI_F; - int multiplier; - - // The latter 3 streams will generate the second half of the elements - // In that case phase = 1 - - int phase = 0; - if (stream >= 3) { - stream -= 3; - phase = 1; - } - switch (stream) { - case 0: multiplier = 2; break; - case 1: multiplier = 1; break; - case 2: multiplier = 3; break; - default: multiplier = 0; - } - int pos = (1 << (stage - 1)) * multiplier * ((index + (size / 8) * phase) - & (size / 4 / (1 << (stage - 1)) - 1)); - float theta = -1.0f * TWOPI / size * (pos & (size - 1)); - twid.x = cos(theta); - twid.y = sin(theta); - } - return twid; -} - -// FFT complex rotation building block -float2x8 complex_rotate(float2x8 data, int index, int stage, int size) { - data.i1 = comp_mult(data.i1, twiddle(index, stage, size, 0)); - data.i2 = comp_mult(data.i2, twiddle(index, stage, size, 1)); - data.i3 = comp_mult(data.i3, twiddle(index, stage, size, 2)); - data.i5 = comp_mult(data.i5, twiddle(index, stage, size, 3)); - data.i6 = comp_mult(data.i6, twiddle(index, stage, size, 4)); - data.i7 = comp_mult(data.i7, twiddle(index, stage, size, 5)); - return data; -} - - -// Process 8 input points towards and a FFT/iFFT of size N, N >= 8 -// (in order input, bit reversed output). Apply all input points in N / 8 -// consecutive invocations. Obtain all outputs in N /8 consecutive invocations -// starting with invocation N /8 - 1 (outputs are delayed). Multiple back-to-back -// transforms can be executed -// -// 'data' encapsulates 8 complex single-precision floating-point input points -// 'step' specifies the index of the current invocation -// 'fft_delay_elements' is an array representing a sliding window of size N+8*(log(N)-2) -// 'inverse' toggles between the direct and inverse transform -// 'logN' should be a COMPILE TIME constant evaluating log(N) - the constant is -// propagated throughout the code to achieve efficient hardware -// -float2x8 fft_step(float2x8 data, int step, float2 *fft_delay_elements, - bool inverse, const int logN) { - const int size = 1 << logN; - - // Swap real and imaginary components if doing an inverse transform - if (inverse) { - data = swap_complex(data); - } - - // Stage 0 of feed-forward FFT - data = butterfly(data); - data = trivial_rotate(data); - data = trivial_swap(data); - - // Stage 1 - data = butterfly(data); - data = complex_rotate(data, step & (size / 8 - 1), 1, size); - data = swap(data); - - // Next logN - 2 stages alternate two computation patterns - represented as - // a loop to avoid code duplication. Instruct the compiler to fully unroll - // the loop to increase the amount of pipeline parallelism and allow feed - // forward execution - - #pragma unroll - for (int stage = 2; stage < logN - 1; stage++) { - bool complex_stage = stage & 1; // stages 3, 5, ... - - // Figure out the index of the element processed at this stage - // Subtract (add modulo size / 8) the delay incurred as data travels - // from one stage to the next - int data_index = (step + ( 1 << (logN - 1 - stage))) & (size / 8 - 1); - - data = butterfly(data); - - if (complex_stage) { - data = complex_rotate(data, data_index, stage, size); - } - - data = swap(data); - - // Compute the delay of this stage - int delay = 1 << (logN - 2 - stage); - - // Reordering multiplexers must toggle every 'delay' steps - bool toggle = data_index & delay; - - // Assign unique sections of the buffer for the set of delay elements at - // each stage - float2 *head_buffer = fft_delay_elements + - size - (1 << (logN - stage + 2)) + 8 * (stage - 2); - - data = reorder_data(data, delay, head_buffer, toggle); - - if (!complex_stage) { - data = trivial_rotate(data); - } - } - - // Stage logN - 1 - data = butterfly(data); - - // Shift the contents of the sliding window. The hardware is capable of - // shifting the entire contents in parallel if the loop is unrolled. More - // important, when unrolling this loop each transfer maps to a trivial - // loop-carried dependency - #pragma unroll - for (int ii = 0; ii < size + 8 * (logN - 2) - 1; ii++) { - fft_delay_elements[ii] = fft_delay_elements[ii + 1]; - } - - if (inverse) { - data = swap_complex(data); - } - - return data; -} - diff --git a/kernels/fft2d/twid_radix4_8.cl b/kernels/fft2d/twid_radix4_8.cl deleted file mode 100644 index 6765cb1..0000000 --- a/kernels/fft2d/twid_radix4_8.cl +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright (C) 2013-2019 Altera Corporation, San Jose, California, USA. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to -// whom the Software is furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. -// -// This agreement shall be governed in all respects by the laws of the State of California and -// by the laws of the United States of America. - -// Twiddle factors for radix-4 FFTs -// Precomputed for FFT sizes between 8 and 4096 points - -#define TWID_STAGES 5 - -constant float tc00[512] = {1.0f, 0.9999952912f, 0.9999811649f, 0.9999576211f, 0.9999247193f, 0.9998823404f, 0.9998306036f, 0.9997693896f, 0.9996988177f, 0.9996188283f, 0.9995294213f, 0.9994305968f, 0.9993223548f, 0.9992047548f, 0.9990777373f, 0.9989413023f, 0.9987954497f, 0.9986402392f, 0.9984755516f, 0.9983015656f, 0.9981181026f, 0.9979252815f, 0.997723043f, 0.9975114465f, 0.9972904325f, 0.9970600605f, 0.996820271f, 0.9965711236f, 0.9963126183f, 0.9960446954f, 0.9957674146f, 0.9954807758f, 0.9951847196f, 0.9948793054f, 0.9945645928f, 0.9942404628f, 0.9939069748f, 0.9935641289f, 0.993211925f, 0.9928504229f, 0.9924795628f, 0.9920992851f, 0.9917097688f, 0.9913108349f, 0.9909026623f, 0.9904850721f, 0.9900581837f, 0.9896219969f, 0.9891765118f, 0.9887216687f, 0.988257587f, 0.9877841473f, 0.9873014092f, 0.9868093729f, 0.9863080978f, 0.9857975245f, 0.9852776527f, 0.9847484827f, 0.9842100739f, 0.9836624265f, 0.9831054807f, 0.9825392962f, 0.9819638729f, 0.9813792109f, 0.9807852507f, 0.9801821113f, 0.9795697927f, 0.9789481759f, 0.97831738f, 0.9776773453f, 0.9770281315f, 0.9763697386f, 0.975702107f, 0.9750253558f, 0.974339366f, 0.9736442566f, 0.9729399681f, 0.9722265005f, 0.9715039134f, 0.9707721472f, 0.9700312614f, 0.9692812562f, 0.9685220718f, 0.9677538276f, 0.9669764638f, 0.9661899805f, 0.9653944373f, 0.9645897746f, 0.963776052f, 0.9629532695f, 0.9621214271f, 0.9612804651f, 0.9604305029f, 0.9595715404f, 0.9587034583f, 0.9578264356f, 0.9569403529f, 0.95604527f, 0.9551411867f, 0.9542281032f, 0.9533060193f, 0.9523749948f, 0.9514350295f, 0.950486064f, 0.9495281577f, 0.9485613704f, 0.9475855827f, 0.946600914f, 0.9456073046f, 0.9446048141f, 0.9435934424f, 0.9425731897f, 0.9415440559f, 0.940506041f, 0.9394592047f, 0.9384035468f, 0.9373390079f, 0.9362656474f, 0.9351835251f, 0.9340925217f, 0.932992816f, 0.9318842888f, 0.9307669401f, 0.9296408892f, 0.9285060763f, 0.9273625016f, 0.9262102246f, 0.9250492454f, 0.9238795042f, 0.9227011204f, 0.9215140343f, 0.9203183055f, 0.9191138744f, 0.9179008007f, 0.9166790843f, 0.9154487252f, 0.9142097831f, 0.9129621983f, 0.9117060304f, 0.9104412794f, 0.909168005f, 0.9078860879f, 0.9065957069f, 0.9052967429f, 0.903989315f, 0.9026733041f, 0.9013488293f, 0.9000158906f, 0.8986744881f, 0.8973245621f, 0.8959662318f, 0.8945994973f, 0.893224299f, 0.8918406963f, 0.8904487491f, 0.8890483379f, 0.8876396418f, 0.8862225413f, 0.8847970963f, 0.8833633661f, 0.8819212914f, 0.8804708719f, 0.8790122271f, 0.8775452971f, 0.8760700822f, 0.8745866418f, 0.8730949759f, 0.8715950847f, 0.8700869679f, 0.8685706854f, 0.867046237f, 0.8655136228f, 0.8639728427f, 0.8624239564f, 0.8608669639f, 0.8593018055f, 0.8577286005f, 0.8561473489f, 0.854557991f, 0.8529605865f, 0.851355195f, 0.8497417569f, 0.8481203318f, 0.8464909196f, 0.84485358f, 0.8432082534f, 0.8415549994f, 0.8398938179f, 0.838224709f, 0.8365477324f, 0.8348628879f, 0.8331701756f, 0.8314695954f, 0.8297612071f, 0.8280450702f, 0.8263210654f, 0.8245893121f, 0.8228498101f, 0.8211025f, 0.8193475008f, 0.8175848126f, 0.8158144355f, 0.8140363097f, 0.8122506142f, 0.81045717f, 0.8086561561f, 0.8068475723f, 0.8050313592f, 0.8032075167f, 0.801376164f, 0.7995372415f, 0.7976908684f, 0.7958369255f, 0.7939754725f, 0.7921065688f, 0.7902302146f, 0.7883464098f, 0.786455214f, 0.7845565677f, 0.7826505899f, 0.7807372212f, 0.7788165212f, 0.7768884897f, 0.7749531269f, 0.7730104327f, 0.7710605264f, 0.7691033483f, 0.7671388984f, 0.7651672363f, 0.7631884217f, 0.761202395f, 0.7592092156f, 0.7572088242f, 0.7552013993f, 0.7531868219f, 0.7511651516f, 0.7491363883f, 0.7471005917f, 0.7450577617f, 0.7430079579f, 0.7409511209f, 0.73888731f, 0.7368165851f, 0.7347388864f, 0.7326542735f, 0.7305627465f, 0.728464365f, 0.726359129f, 0.724247098f, 0.7221282125f, 0.720002532f, 0.7178700566f, 0.7157308459f, 0.7135848403f, 0.7114322186f, 0.7092728019f, 0.7071067691f, 0.7049340606f, 0.7027547359f, 0.7005687952f, 0.6983762383f, 0.696177125f, 0.6939714551f, 0.6917592287f, 0.689540565f, 0.6873153448f, 0.6850836873f, 0.6828455329f, 0.6806010008f, 0.6783500314f, 0.6760926843f, 0.6738290191f, 0.6715589762f, 0.6692826152f, 0.6669999361f, 0.6647109985f, 0.6624158025f, 0.6601143479f, 0.6578066945f, 0.6554928422f, 0.6531728506f, 0.6508466601f, 0.64851439f, 0.6461760402f, 0.6438315511f, 0.6414810419f, 0.6391244531f, 0.6367618442f, 0.6343932748f, 0.6320187449f, 0.6296382546f, 0.6272518039f, 0.6248595119f, 0.6224612594f, 0.6200572252f, 0.6176472902f, 0.6152315736f, 0.6128100753f, 0.6103827953f, 0.6079497933f, 0.6055110693f, 0.6030666232f, 0.6006164551f, 0.5981606841f, 0.5956993103f, 0.5932322741f, 0.5907596946f, 0.5882815719f, 0.5857978463f, 0.5833086371f, 0.5808139443f, 0.5783137679f, 0.5758081675f, 0.573297143f, 0.5707807541f, 0.5682589412f, 0.5657318234f, 0.5631993413f, 0.5606615543f, 0.5581185222f, 0.5555702448f, 0.5530167222f, 0.5504579544f, 0.5478940606f, 0.5453249812f, 0.5427507758f, 0.5401714444f, 0.5375870466f, 0.534997642f, 0.5324031115f, 0.5298036337f, 0.5271991491f, 0.5245896578f, 0.5219752789f, 0.5193560123f, 0.5167317986f, 0.514102757f, 0.5114688277f, 0.5088301301f, 0.5061866641f, 0.5035383701f, 0.5008853674f, 0.4982276559f, 0.4955652654f, 0.492898196f, 0.4902264774f, 0.4875501692f, 0.4848692417f, 0.4821837842f, 0.479493767f, 0.4767992198f, 0.4741002023f, 0.4713967443f, 0.4686888158f, 0.4659765065f, 0.4632597864f, 0.4605387151f, 0.4578132927f, 0.4550835788f, 0.4523495734f, 0.449611336f, 0.4468688369f, 0.4441221356f, 0.4413712621f, 0.438616246f, 0.4358570874f, 0.433093816f, 0.4303264916f, 0.4275550842f, 0.4247796834f, 0.4220002592f, 0.4192169011f, 0.4164295495f, 0.4136383235f, 0.4108431637f, 0.4080441594f, 0.4052413106f, 0.4024346471f, 0.3996241987f, 0.3968099952f, 0.3939920366f, 0.3911703825f, 0.3883450329f, 0.3855160475f, 0.3826834261f, 0.3798471987f, 0.3770074248f, 0.3741640747f, 0.3713172078f, 0.3684668243f, 0.3656129837f, 0.3627557158f, 0.3598950505f, 0.3570309579f, 0.3541635275f, 0.3512927592f, 0.3484186828f, 0.3455413282f, 0.3426607251f, 0.3397768736f, 0.336889863f, 0.3339996636f, 0.3311063051f, 0.3282098472f, 0.3253102899f, 0.3224076927f, 0.3195020258f, 0.3165933788f, 0.3136817515f, 0.310767144f, 0.3078496456f, 0.3049292266f, 0.3020059466f, 0.2990798354f, 0.296150893f, 0.2932191491f, 0.2902846634f, 0.2873474658f, 0.2844075263f, 0.2814649343f, 0.27851969f, 0.2755718231f, 0.2726213634f, 0.2696683109f, 0.266712755f, 0.2637546659f, 0.2607941031f, 0.2578310966f, 0.2548656464f, 0.2518978119f, 0.2489276081f, 0.24595505f, 0.2429801822f, 0.2400030196f, 0.2370236069f, 0.234041959f, 0.2310581058f, 0.228072077f, 0.2250839174f, 0.2220936269f, 0.2191012353f, 0.2161068022f, 0.2131103128f, 0.2101118416f, 0.2071113735f, 0.2041089684f, 0.201104641f, 0.1980984062f, 0.1950903237f, 0.1920803934f, 0.1890686601f, 0.1860551536f, 0.1830398887f, 0.1800228953f, 0.1770042181f, 0.1739838719f, 0.1709618866f, 0.167938292f, 0.1649131179f, 0.161886394f, 0.1588581502f, 0.1558284014f, 0.1527971923f, 0.1497645378f, 0.1467304677f, 0.1436950266f, 0.1406582445f, 0.1376201212f, 0.1345807016f, 0.1315400302f, 0.1284981072f, 0.1254549772f, 0.1224106774f, 0.1193652153f, 0.1163186282f, 0.1132709533f, 0.1102222055f, 0.1071724221f, 0.1041216329f, 0.1010698602f, 0.09801714122f, 0.09496349841f, 0.09190895408f, 0.08885355294f, 0.08579730988f, 0.08274026215f, 0.07968243957f, 0.07662386447f, 0.07356456667f, 0.07050457597f, 0.06744392216f, 0.06438262761f, 0.061320737f, 0.05825826526f, 0.05519524589f, 0.05213170499f, 0.04906767607f, 0.04600318149f, 0.0429382585f, 0.03987292573f, 0.03680722415f, 0.0337411724f, 0.030674804f, 0.02760814503f, 0.02454122901f, 0.02147408016f, 0.01840673015f, 0.01533920597f, 0.01227153838f, 0.009203754365f, 0.006135884672f, 0.003067956772f}; -constant float tc03[512] = {6.123234263e-17f, -0.003067956772f, -0.006135884672f, -0.009203754365f, -0.01227153838f, -0.01533920597f, -0.01840673015f, -0.02147408016f, -0.02454122901f, -0.02760814503f, -0.030674804f, -0.0337411724f, -0.03680722415f, -0.03987292573f, -0.0429382585f, -0.04600318149f, -0.04906767607f, -0.05213170499f, -0.05519524589f, -0.05825826526f, -0.061320737f, -0.06438262761f, -0.06744392216f, -0.07050457597f, -0.07356456667f, -0.07662386447f, -0.07968243957f, -0.08274026215f, -0.08579730988f, -0.08885355294f, -0.09190895408f, -0.09496349841f, -0.09801714122f, -0.1010698602f, -0.1041216329f, -0.1071724221f, -0.1102222055f, -0.1132709533f, -0.1163186282f, -0.1193652153f, -0.1224106774f, -0.1254549772f, -0.1284981072f, -0.1315400302f, -0.1345807016f, -0.1376201212f, -0.1406582445f, -0.1436950266f, -0.1467304677f, -0.1497645378f, -0.1527971923f, -0.1558284014f, -0.1588581502f, -0.161886394f, -0.1649131179f, -0.167938292f, -0.1709618866f, -0.1739838719f, -0.1770042181f, -0.1800228953f, -0.1830398887f, -0.1860551536f, -0.1890686601f, -0.1920803934f, -0.1950903237f, -0.1980984062f, -0.201104641f, -0.2041089684f, -0.2071113735f, -0.2101118416f, -0.2131103128f, -0.2161068022f, -0.2191012353f, -0.2220936269f, -0.2250839174f, -0.228072077f, -0.2310581058f, -0.234041959f, -0.2370236069f, -0.2400030196f, -0.2429801822f, -0.24595505f, -0.2489276081f, -0.2518978119f, -0.2548656464f, -0.2578310966f, -0.2607941031f, -0.2637546659f, -0.266712755f, -0.2696683109f, -0.2726213634f, -0.2755718231f, -0.27851969f, -0.2814649343f, -0.2844075263f, -0.2873474658f, -0.2902846634f, -0.2932191491f, -0.296150893f, -0.2990798354f, -0.3020059466f, -0.3049292266f, -0.3078496456f, -0.310767144f, -0.3136817515f, -0.3165933788f, -0.3195020258f, -0.3224076927f, -0.3253102899f, -0.3282098472f, -0.3311063051f, -0.3339996636f, -0.336889863f, -0.3397768736f, -0.3426607251f, -0.3455413282f, -0.3484186828f, -0.3512927592f, -0.3541635275f, -0.3570309579f, -0.3598950505f, -0.3627557158f, -0.3656129837f, -0.3684668243f, -0.3713172078f, -0.3741640747f, -0.3770074248f, -0.3798471987f, -0.3826834261f, -0.3855160475f, -0.3883450329f, -0.3911703825f, -0.3939920366f, -0.3968099952f, -0.3996241987f, -0.4024346471f, -0.4052413106f, -0.4080441594f, -0.4108431637f, -0.4136383235f, -0.4164295495f, -0.4192169011f, -0.4220002592f, -0.4247796834f, -0.4275550842f, -0.4303264916f, -0.433093816f, -0.4358570874f, -0.438616246f, -0.4413712621f, -0.4441221356f, -0.4468688369f, -0.449611336f, -0.4523495734f, -0.4550835788f, -0.4578132927f, -0.4605387151f, -0.4632597864f, -0.4659765065f, -0.4686888158f, -0.4713967443f, -0.4741002023f, -0.4767992198f, -0.479493767f, -0.4821837842f, -0.4848692417f, -0.4875501692f, -0.4902264774f, -0.492898196f, -0.4955652654f, -0.4982276559f, -0.5008853674f, -0.5035383701f, -0.5061866641f, -0.5088301301f, -0.5114688277f, -0.514102757f, -0.5167317986f, -0.5193560123f, -0.5219752789f, -0.5245896578f, -0.5271991491f, -0.5298036337f, -0.5324031115f, -0.534997642f, -0.5375870466f, -0.5401714444f, -0.5427507758f, -0.5453249812f, -0.5478940606f, -0.5504579544f, -0.5530167222f, -0.5555702448f, -0.5581185222f, -0.5606615543f, -0.5631993413f, -0.5657318234f, -0.5682589412f, -0.5707807541f, -0.573297143f, -0.5758081675f, -0.5783137679f, -0.5808139443f, -0.5833086371f, -0.5857978463f, -0.5882815719f, -0.5907596946f, -0.5932322741f, -0.5956993103f, -0.5981606841f, -0.6006164551f, -0.6030666232f, -0.6055110693f, -0.6079497933f, -0.6103827953f, -0.6128100753f, -0.6152315736f, -0.6176472902f, -0.6200572252f, -0.6224612594f, -0.6248595119f, -0.6272518039f, -0.6296382546f, -0.6320187449f, -0.6343932748f, -0.6367618442f, -0.6391244531f, -0.6414810419f, -0.6438315511f, -0.6461760402f, -0.64851439f, -0.6508466601f, -0.6531728506f, -0.6554928422f, -0.6578066945f, -0.6601143479f, -0.6624158025f, -0.6647109985f, -0.6669999361f, -0.6692826152f, -0.6715589762f, -0.6738290191f, -0.6760926843f, -0.6783500314f, -0.6806010008f, -0.6828455329f, -0.6850836873f, -0.6873153448f, -0.689540565f, -0.6917592287f, -0.6939714551f, -0.696177125f, -0.6983762383f, -0.7005687952f, -0.7027547359f, -0.7049340606f, -0.7071067691f, -0.7092728019f, -0.7114322186f, -0.7135848403f, -0.7157308459f, -0.7178700566f, -0.720002532f, -0.7221282125f, -0.724247098f, -0.726359129f, -0.728464365f, -0.7305627465f, -0.7326542735f, -0.7347388864f, -0.7368165851f, -0.73888731f, -0.7409511209f, -0.7430079579f, -0.7450577617f, -0.7471005917f, -0.7491363883f, -0.7511651516f, -0.7531868219f, -0.7552013993f, -0.7572088242f, -0.7592092156f, -0.761202395f, -0.7631884217f, -0.7651672363f, -0.7671388984f, -0.7691033483f, -0.7710605264f, -0.7730104327f, -0.7749531269f, -0.7768884897f, -0.7788165212f, -0.7807372212f, -0.7826505899f, -0.7845565677f, -0.786455214f, -0.7883464098f, -0.7902302146f, -0.7921065688f, -0.7939754725f, -0.7958369255f, -0.7976908684f, -0.7995372415f, -0.801376164f, -0.8032075167f, -0.8050313592f, -0.8068475723f, -0.8086561561f, -0.81045717f, -0.8122506142f, -0.8140363097f, -0.8158144355f, -0.8175848126f, -0.8193475008f, -0.8211025f, -0.8228498101f, -0.8245893121f, -0.8263210654f, -0.8280450702f, -0.8297612071f, -0.8314695954f, -0.8331701756f, -0.8348628879f, -0.8365477324f, -0.838224709f, -0.8398938179f, -0.8415549994f, -0.8432082534f, -0.84485358f, -0.8464909196f, -0.8481203318f, -0.8497417569f, -0.851355195f, -0.8529605865f, -0.854557991f, -0.8561473489f, -0.8577286005f, -0.8593018055f, -0.8608669639f, -0.8624239564f, -0.8639728427f, -0.8655136228f, -0.867046237f, -0.8685706854f, -0.8700869679f, -0.8715950847f, -0.8730949759f, -0.8745866418f, -0.8760700822f, -0.8775452971f, -0.8790122271f, -0.8804708719f, -0.8819212914f, -0.8833633661f, -0.8847970963f, -0.8862225413f, -0.8876396418f, -0.8890483379f, -0.8904487491f, -0.8918406963f, -0.893224299f, -0.8945994973f, -0.8959662318f, -0.8973245621f, -0.8986744881f, -0.9000158906f, -0.9013488293f, -0.9026733041f, -0.903989315f, -0.9052967429f, -0.9065957069f, -0.9078860879f, -0.909168005f, -0.9104412794f, -0.9117060304f, -0.9129621983f, -0.9142097831f, -0.9154487252f, -0.9166790843f, -0.9179008007f, -0.9191138744f, -0.9203183055f, -0.9215140343f, -0.9227011204f, -0.9238795042f, -0.9250492454f, -0.9262102246f, -0.9273625016f, -0.9285060763f, -0.9296408892f, -0.9307669401f, -0.9318842888f, -0.932992816f, -0.9340925217f, -0.9351835251f, -0.9362656474f, -0.9373390079f, -0.9384035468f, -0.9394592047f, -0.940506041f, -0.9415440559f, -0.9425731897f, -0.9435934424f, -0.9446048141f, -0.9456073046f, -0.946600914f, -0.9475855827f, -0.9485613704f, -0.9495281577f, -0.950486064f, -0.9514350295f, -0.9523749948f, -0.9533060193f, -0.9542281032f, -0.9551411867f, -0.95604527f, -0.9569403529f, -0.9578264356f, -0.9587034583f, -0.9595715404f, -0.9604305029f, -0.9612804651f, -0.9621214271f, -0.9629532695f, -0.963776052f, -0.9645897746f, -0.9653944373f, -0.9661899805f, -0.9669764638f, -0.9677538276f, -0.9685220718f, -0.9692812562f, -0.9700312614f, -0.9707721472f, -0.9715039134f, -0.9722265005f, -0.9729399681f, -0.9736442566f, -0.974339366f, -0.9750253558f, -0.975702107f, -0.9763697386f, -0.9770281315f, -0.9776773453f, -0.97831738f, -0.9789481759f, -0.9795697927f, -0.9801821113f, -0.9807852507f, -0.9813792109f, -0.9819638729f, -0.9825392962f, -0.9831054807f, -0.9836624265f, -0.9842100739f, -0.9847484827f, -0.9852776527f, -0.9857975245f, -0.9863080978f, -0.9868093729f, -0.9873014092f, -0.9877841473f, -0.988257587f, -0.9887216687f, -0.9891765118f, -0.9896219969f, -0.9900581837f, -0.9904850721f, -0.9909026623f, -0.9913108349f, -0.9917097688f, -0.9920992851f, -0.9924795628f, -0.9928504229f, -0.993211925f, -0.9935641289f, -0.9939069748f, -0.9942404628f, -0.9945645928f, -0.9948793054f, -0.9951847196f, -0.9954807758f, -0.9957674146f, -0.9960446954f, -0.9963126183f, -0.9965711236f, -0.996820271f, -0.9970600605f, -0.9972904325f, -0.9975114465f, -0.997723043f, -0.9979252815f, -0.9981181026f, -0.9983015656f, -0.9984755516f, -0.9986402392f, -0.9987954497f, -0.9989413023f, -0.9990777373f, -0.9992047548f, -0.9993223548f, -0.9994305968f, -0.9995294213f, -0.9996188283f, -0.9996988177f, -0.9997693896f, -0.9998306036f, -0.9998823404f, -0.9999247193f, -0.9999576211f, -0.9999811649f, -0.9999952912f}; -constant float tc01[512] = {1.0f, 0.9999988079f, 0.9999952912f, 0.9999893904f, 0.9999811649f, 0.9999706149f, 0.9999576211f, 0.9999423623f, 0.9999247193f, 0.9999046922f, 0.9998823404f, 0.9998576641f, 0.9998306036f, 0.9998011589f, 0.9997693896f, 0.9997352958f, 0.9996988177f, 0.9996600151f, 0.9996188283f, 0.9995753169f, 0.9995294213f, 0.9994812012f, 0.9994305968f, 0.9993776679f, 0.9993223548f, 0.9992647767f, 0.9992047548f, 0.9991424084f, 0.9990777373f, 0.9990106821f, 0.9989413023f, 0.9988695383f, 0.9987954497f, 0.9987190366f, 0.9986402392f, 0.9985590577f, 0.9984755516f, 0.9983897209f, 0.9983015656f, 0.9982110262f, 0.9981181026f, 0.9980228543f, 0.9979252815f, 0.9978253245f, 0.997723043f, 0.9976184368f, 0.9975114465f, 0.9974021316f, 0.9972904325f, 0.9971764088f, 0.9970600605f, 0.996941328f, 0.996820271f, 0.9966968894f, 0.9965711236f, 0.9964430332f, 0.9963126183f, 0.9961798191f, 0.9960446954f, 0.9959072471f, 0.9957674146f, 0.9956252575f, 0.9954807758f, 0.99533391f, 0.9951847196f, 0.9950332046f, 0.9948793054f, 0.9947231412f, 0.9945645928f, 0.9944036603f, 0.9942404628f, 0.9940748811f, 0.9939069748f, 0.9937367439f, 0.9935641289f, 0.9933891892f, 0.993211925f, 0.9930323362f, 0.9928504229f, 0.9926661253f, 0.9924795628f, 0.992290616f, 0.9920992851f, 0.9919056892f, 0.9917097688f, 0.9915114641f, 0.9913108349f, 0.9911079407f, 0.9909026623f, 0.9906949997f, 0.9904850721f, 0.99027282f, 0.9900581837f, 0.9898412824f, 0.9896219969f, 0.9894004464f, 0.9891765118f, 0.9889502525f, 0.9887216687f, 0.9884908199f, 0.988257587f, 0.9880220294f, 0.9877841473f, 0.9875439405f, 0.9873014092f, 0.9870565534f, 0.9868093729f, 0.9865599275f, 0.9863080978f, 0.9860539436f, 0.9857975245f, 0.9855387211f, 0.9852776527f, 0.9850142598f, 0.9847484827f, 0.9844804406f, 0.9842100739f, 0.9839374423f, 0.9836624265f, 0.9833850861f, 0.9831054807f, 0.9828235507f, 0.9825392962f, 0.982252717f, 0.9819638729f, 0.9816727042f, 0.9813792109f, 0.9810833931f, 0.9807852507f, 0.9804848433f, 0.9801821113f, 0.9798771143f, 0.9795697927f, 0.9792601466f, 0.9789481759f, 0.9786339402f, 0.97831738f, 0.9779984951f, 0.9776773453f, 0.9773538709f, 0.9770281315f, 0.9767000675f, 0.9763697386f, 0.9760370851f, 0.975702107f, 0.9753648639f, 0.9750253558f, 0.9746835232f, 0.974339366f, 0.9739929438f, 0.9736442566f, 0.9732932448f, 0.9729399681f, 0.9725843668f, 0.9722265005f, 0.9718663096f, 0.9715039134f, 0.971139133f, 0.9707721472f, 0.9704028368f, 0.9700312614f, 0.9696573615f, 0.9692812562f, 0.9689028263f, 0.9685220718f, 0.968139112f, 0.9677538276f, 0.9673662782f, 0.9669764638f, 0.9665843844f, 0.9661899805f, 0.9657933712f, 0.9653944373f, 0.9649932384f, 0.9645897746f, 0.9641840458f, 0.963776052f, 0.9633657932f, 0.9629532695f, 0.9625384808f, 0.9621214271f, 0.9617020488f, 0.9612804651f, 0.9608566165f, 0.9604305029f, 0.9600021243f, 0.9595715404f, 0.9591386318f, 0.9587034583f, 0.9582660794f, 0.9578264356f, 0.9573845267f, 0.9569403529f, 0.9564939141f, 0.95604527f, 0.9555943608f, 0.9551411867f, 0.9546857476f, 0.9542281032f, 0.9537681937f, 0.9533060193f, 0.9528416395f, 0.9523749948f, 0.9519061446f, 0.9514350295f, 0.9509616494f, 0.950486064f, 0.9500082731f, 0.9495281577f, 0.9490458965f, 0.9485613704f, 0.9480745792f, 0.9475855827f, 0.9470943809f, 0.946600914f, 0.9461052418f, 0.9456073046f, 0.9451072216f, 0.9446048141f, 0.9441002607f, 0.9435934424f, 0.9430844188f, 0.9425731897f, 0.9420597553f, 0.9415440559f, 0.9410261512f, 0.940506041f, 0.9399837255f, 0.9394592047f, 0.9389324784f, 0.9384035468f, 0.9378723502f, 0.9373390079f, 0.9368034601f, 0.9362656474f, 0.9357256889f, 0.9351835251f, 0.9346391559f, 0.9340925217f, 0.9335438013f, 0.932992816f, 0.9324396253f, 0.9318842888f, 0.9313266873f, 0.9307669401f, 0.9302050471f, 0.9296408892f, 0.9290745854f, 0.9285060763f, 0.9279354215f, 0.9273625016f, 0.9267874956f, 0.9262102246f, 0.9256308079f, 0.9250492454f, 0.9244654775f, 0.9238795042f, 0.9232914448f, 0.9227011204f, 0.9221086502f, 0.9215140343f, 0.920917213f, 0.9203183055f, 0.919717133f, 0.9191138744f, 0.9185084105f, 0.9179008007f, 0.9172909856f, 0.9166790843f, 0.9160649776f, 0.9154487252f, 0.914830327f, 0.9142097831f, 0.9135870337f, 0.9129621983f, 0.9123351574f, 0.9117060304f, 0.9110747576f, 0.9104412794f, 0.9098057151f, 0.909168005f, 0.9085280895f, 0.9078860879f, 0.9072420001f, 0.9065957069f, 0.905947268f, 0.9052967429f, 0.9046440721f, 0.903989315f, 0.9033323526f, 0.9026733041f, 0.9020121694f, 0.9013488293f, 0.900683403f, 0.9000158906f, 0.8993462324f, 0.8986744881f, 0.898000598f, 0.8973245621f, 0.8966464996f, 0.8959662318f, 0.8952839375f, 0.8945994973f, 0.893912971f, 0.893224299f, 0.8925335407f, 0.8918406963f, 0.8911457658f, 0.8904487491f, 0.8897495866f, 0.8890483379f, 0.8883450627f, 0.8876396418f, 0.8869321346f, 0.8862225413f, 0.8855108619f, 0.8847970963f, 0.8840812445f, 0.8833633661f, 0.882643342f, 0.8819212914f, 0.8811970949f, 0.8804708719f, 0.8797426224f, 0.8790122271f, 0.8782798052f, 0.8775452971f, 0.8768087029f, 0.8760700822f, 0.8753293753f, 0.8745866418f, 0.8738418221f, 0.8730949759f, 0.8723460436f, 0.8715950847f, 0.8708420396f, 0.8700869679f, 0.8693298697f, 0.8685706854f, 0.8678094745f, 0.867046237f, 0.866280973f, 0.8655136228f, 0.864744246f, 0.8639728427f, 0.8631994128f, 0.8624239564f, 0.8616464734f, 0.8608669639f, 0.8600853682f, 0.8593018055f, 0.8585162163f, 0.8577286005f, 0.8569389582f, 0.8561473489f, 0.8553536534f, 0.854557991f, 0.8537603021f, 0.8529605865f, 0.8521589041f, 0.851355195f, 0.8505494595f, 0.8497417569f, 0.8489320278f, 0.8481203318f, 0.8473066092f, 0.8464909196f, 0.8456732631f, 0.84485358f, 0.8440318704f, 0.8432082534f, 0.8423826098f, 0.8415549994f, 0.8407253623f, 0.8398938179f, 0.8390602469f, 0.838224709f, 0.8373872042f, 0.8365477324f, 0.8357062936f, 0.8348628879f, 0.8340175152f, 0.8331701756f, 0.832320869f, 0.8314695954f, 0.8306164145f, 0.8297612071f, 0.8289040923f, 0.8280450702f, 0.8271840215f, 0.8263210654f, 0.8254561424f, 0.8245893121f, 0.8237205148f, 0.8228498101f, 0.8219771385f, 0.8211025f, 0.8202259541f, 0.8193475008f, 0.8184671402f, 0.8175848126f, 0.8167005777f, 0.8158144355f, 0.8149263263f, 0.8140363097f, 0.8131443858f, 0.8122506142f, 0.8113548756f, 0.81045717f, 0.8095576167f, 0.8086561561f, 0.8077528477f, 0.8068475723f, 0.8059403896f, 0.8050313592f, 0.8041203618f, 0.8032075167f, 0.8022928238f, 0.801376164f, 0.8004576564f, 0.7995372415f, 0.7986149788f, 0.7976908684f, 0.796764791f, 0.7958369255f, 0.7949071527f, 0.7939754725f, 0.7930419445f, 0.7921065688f, 0.7911693454f, 0.7902302146f, 0.7892892361f, 0.7883464098f, 0.7874017358f, 0.786455214f, 0.7855068445f, 0.7845565677f, 0.7836045027f, 0.7826505899f, 0.7816948295f, 0.7807372212f, 0.7797777653f, 0.7788165212f, 0.7778534293f, 0.7768884897f, 0.7759217024f, 0.7749531269f, 0.7739827037f, 0.7730104327f, 0.7720363736f, 0.7710605264f, 0.7700828314f, 0.7691033483f, 0.7681220174f, 0.7671388984f, 0.7661539912f, 0.7651672363f, 0.7641787529f, 0.7631884217f, 0.7621963024f, 0.761202395f, 0.7602066994f, 0.7592092156f, 0.7582098842f, 0.7572088242f, 0.756205976f, 0.7552013993f, 0.7541949749f, 0.7531868219f, 0.7521768212f, 0.7511651516f, 0.7501516342f, 0.7491363883f, 0.7481193542f, 0.7471005917f, 0.7460801005f, 0.7450577617f, 0.7440337539f, 0.7430079579f, 0.7419804335f, 0.7409511209f, 0.7399200797f, 0.73888731f, 0.7378528118f, 0.7368165851f, 0.7357785702f, 0.7347388864f, 0.7336974144f, 0.7326542735f, 0.7316094041f, 0.7305627465f, 0.72951442f, 0.728464365f, 0.727412641f, 0.726359129f, 0.7253039479f, 0.724247098f, 0.7231884599f, 0.7221282125f, 0.7210661769f, 0.720002532f, 0.718937099f, 0.7178700566f, 0.7168012857f, 0.7157308459f, 0.7146586776f, 0.7135848403f, 0.7125093937f, 0.7114322186f, 0.7103533745f, 0.7092728019f, 0.7081906199f}; -constant float tc04[512] = {0.7071067691f, 0.7060212493f, 0.7049340606f, 0.7038452625f, 0.7027547359f, 0.7016626f, 0.7005687952f, 0.6994733214f, 0.6983762383f, 0.6972774863f, 0.696177125f, 0.6950750947f, 0.6939714551f, 0.6928661466f, 0.6917592287f, 0.6906507015f, 0.689540565f, 0.6884287596f, 0.6873153448f, 0.6862003207f, 0.6850836873f, 0.683965385f, 0.6828455329f, 0.6817240715f, 0.6806010008f, 0.6794763207f, 0.6783500314f, 0.6772221923f, 0.6760926843f, 0.6749616265f, 0.6738290191f, 0.6726947427f, 0.6715589762f, 0.6704215407f, 0.6692826152f, 0.6681420207f, 0.6669999361f, 0.6658562422f, 0.6647109985f, 0.6635641456f, 0.6624158025f, 0.6612658501f, 0.6601143479f, 0.6589612961f, 0.6578066945f, 0.6566505432f, 0.6554928422f, 0.6543335915f, 0.6531728506f, 0.65201056f, 0.6508466601f, 0.6496813297f, 0.64851439f, 0.6473459601f, 0.6461760402f, 0.6450045109f, 0.6438315511f, 0.6426570415f, 0.6414810419f, 0.6403034925f, 0.6391244531f, 0.6379439235f, 0.6367618442f, 0.6355783343f, 0.6343932748f, 0.6332067847f, 0.6320187449f, 0.630829215f, 0.6296382546f, 0.6284457445f, 0.6272518039f, 0.6260563731f, 0.6248595119f, 0.6236611009f, 0.6224612594f, 0.6212599874f, 0.6200572252f, 0.618852973f, 0.6176472902f, 0.616440177f, 0.6152315736f, 0.6140215397f, 0.6128100753f, 0.6115971804f, 0.6103827953f, 0.6091670394f, 0.6079497933f, 0.6067311168f, 0.6055110693f, 0.6042895317f, 0.6030666232f, 0.6018422246f, 0.6006164551f, 0.5993893147f, 0.5981606841f, 0.5969306827f, 0.5956993103f, 0.5944665074f, 0.5932322741f, 0.5919966698f, 0.5907596946f, 0.5895212889f, 0.5882815719f, 0.5870403647f, 0.5857978463f, 0.584553957f, 0.5833086371f, 0.582062006f, 0.5808139443f, 0.5795645714f, 0.5783137679f, 0.5770616531f, 0.5758081675f, 0.5745533705f, 0.573297143f, 0.5720396042f, 0.5707807541f, 0.5695205331f, 0.5682589412f, 0.566996038f, 0.5657318234f, 0.564466238f, 0.5631993413f, 0.5619311333f, 0.5606615543f, 0.5593907237f, 0.5581185222f, 0.5568450093f, 0.5555702448f, 0.5542941093f, 0.5530167222f, 0.5517379642f, 0.5504579544f, 0.5491766334f, 0.5478940606f, 0.5466101766f, 0.5453249812f, 0.5440385342f, 0.5427507758f, 0.5414617658f, 0.5401714444f, 0.538879931f, 0.5375870466f, 0.5362929702f, 0.534997642f, 0.5337010026f, 0.5324031115f, 0.5311040282f, 0.5298036337f, 0.5285019875f, 0.5271991491f, 0.5258949995f, 0.5245896578f, 0.523283124f, 0.5219752789f, 0.5206662416f, 0.5193560123f, 0.5180445313f, 0.5167317986f, 0.5154178739f, 0.514102757f, 0.5127863884f, 0.5114688277f, 0.510150075f, 0.5088301301f, 0.5075089931f, 0.5061866641f, 0.5048630834f, 0.5035383701f, 0.5022124648f, 0.5008853674f, 0.4995571077f, 0.4982276559f, 0.4968970418f, 0.4955652654f, 0.4942322969f, 0.492898196f, 0.4915629029f, 0.4902264774f, 0.4888888896f, 0.4875501692f, 0.4862102866f, 0.4848692417f, 0.4835270643f, 0.4821837842f, 0.4808393419f, 0.479493767f, 0.4781470597f, 0.4767992198f, 0.4754502773f, 0.4741002023f, 0.4727490246f, 0.4713967443f, 0.4700433314f, 0.4686888158f, 0.4673331976f, 0.4659765065f, 0.4646186829f, 0.4632597864f, 0.4618997872f, 0.4605387151f, 0.4591765404f, 0.4578132927f, 0.4564489722f, 0.4550835788f, 0.4537171125f, 0.4523495734f, 0.4509809911f, 0.449611336f, 0.448240608f, 0.4468688369f, 0.4454960227f, 0.4441221356f, 0.4427472353f, 0.4413712621f, 0.4399942756f, 0.438616246f, 0.4372371733f, 0.4358570874f, 0.4344759583f, 0.433093816f, 0.4317106605f, 0.4303264916f, 0.4289412796f, 0.4275550842f, 0.4261678755f, 0.4247796834f, 0.4233904779f, 0.4220002592f, 0.4206090868f, 0.4192169011f, 0.4178237021f, 0.4164295495f, 0.4150344133f, 0.4136383235f, 0.4122412205f, 0.4108431637f, 0.4094441533f, 0.4080441594f, 0.4066432118f, 0.4052413106f, 0.4038384557f, 0.4024346471f, 0.4010298848f, 0.3996241987f, 0.3982175589f, 0.3968099952f, 0.3954014778f, 0.3939920366f, 0.3925816715f, 0.3911703825f, 0.3897581697f, 0.3883450329f, 0.3869310021f, 0.3855160475f, 0.3841001987f, 0.3826834261f, 0.3812657595f, 0.3798471987f, 0.3784277439f, 0.3770074248f, 0.3755861819f, 0.3741640747f, 0.3727410734f, 0.3713172078f, 0.3698924482f, 0.3684668243f, 0.3670403361f, 0.3656129837f, 0.3641847968f, 0.3627557158f, 0.3613258004f, 0.3598950505f, 0.3584634066f, 0.3570309579f, 0.3555976748f, 0.3541635275f, 0.3527285457f, 0.3512927592f, 0.3498561382f, 0.3484186828f, 0.3469804227f, 0.3455413282f, 0.344101429f, 0.3426607251f, 0.3412192166f, 0.3397768736f, 0.3383337557f, 0.336889863f, 0.3354451358f, 0.3339996636f, 0.3325533569f, 0.3311063051f, 0.3296584487f, 0.3282098472f, 0.3267604411f, 0.3253102899f, 0.3238593638f, 0.3224076927f, 0.3209552467f, 0.3195020258f, 0.3180480897f, 0.3165933788f, 0.3151379228f, 0.3136817515f, 0.3122248054f, 0.310767144f, 0.3093087673f, 0.3078496456f, 0.3063898087f, 0.3049292266f, 0.3034679592f, 0.3020059466f, 0.3005432487f, 0.2990798354f, 0.2976157069f, 0.296150893f, 0.2946853638f, 0.2932191491f, 0.291752249f, 0.2902846634f, 0.2888164222f, 0.2873474658f, 0.2858778238f, 0.2844075263f, 0.282936573f, 0.2814649343f, 0.27999264f, 0.27851969f, 0.2770460844f, 0.2755718231f, 0.2740969062f, 0.2726213634f, 0.271145165f, 0.2696683109f, 0.2681908607f, 0.266712755f, 0.2652340233f, 0.2637546659f, 0.2622747123f, 0.2607941031f, 0.2593129277f, 0.2578310966f, 0.2563486695f, 0.2548656464f, 0.2533820271f, 0.2518978119f, 0.2504130006f, 0.2489276081f, 0.2474416196f, 0.24595505f, 0.2444678992f, 0.2429801822f, 0.241491884f, 0.2400030196f, 0.2385135889f, 0.2370236069f, 0.2355330586f, 0.234041959f, 0.2325503081f, 0.2310581058f, 0.2295653671f, 0.228072077f, 0.2265782654f, 0.2250839174f, 0.2235890329f, 0.2220936269f, 0.2205976844f, 0.2191012353f, 0.2176042795f, 0.2161068022f, 0.2146088183f, 0.2131103128f, 0.2116113305f, 0.2101118416f, 0.208611846f, 0.2071113735f, 0.2056104094f, 0.2041089684f, 0.2026070356f, 0.201104641f, 0.1996017545f, 0.1980984062f, 0.1965945959f, 0.1950903237f, 0.1935855895f, 0.1920803934f, 0.1905747503f, 0.1890686601f, 0.1875621229f, 0.1860551536f, 0.1845477372f, 0.1830398887f, 0.1815316081f, 0.1800228953f, 0.1785137653f, 0.1770042181f, 0.1754942536f, 0.1739838719f, 0.1724730879f, 0.1709618866f, 0.169450298f, 0.167938292f, 0.1664258987f, 0.1649131179f, 0.1633999497f, 0.161886394f, 0.1603724509f, 0.1588581502f, 0.1573434621f, 0.1558284014f, 0.1543129683f, 0.1527971923f, 0.1512810439f, 0.1497645378f, 0.1482476741f, 0.1467304677f, 0.1452129185f, 0.1436950266f, 0.1421768069f, 0.1406582445f, 0.1391393393f, 0.1376201212f, 0.1361005753f, 0.1345807016f, 0.1330605298f, 0.1315400302f, 0.1300192177f, 0.1284981072f, 0.1269766986f, 0.1254549772f, 0.1239329726f, 0.1224106774f, 0.1208880842f, 0.1193652153f, 0.1178420633f, 0.1163186282f, 0.1147949249f, 0.1132709533f, 0.1117467135f, 0.1102222055f, 0.1086974442f, 0.1071724221f, 0.1056471542f, 0.1041216329f, 0.1025958657f, 0.1010698602f, 0.09954361618f, 0.09801714122f, 0.09649042785f, 0.09496349841f, 0.09343633801f, 0.09190895408f, 0.09038136154f, 0.08885355294f, 0.08732553571f, 0.08579730988f, 0.08426889032f, 0.08274026215f, 0.08121144772f, 0.07968243957f, 0.07815324515f, 0.07662386447f, 0.07509429753f, 0.07356456667f, 0.07203464955f, 0.07050457597f, 0.06897433102f, 0.06744392216f, 0.06591334939f, 0.06438262761f, 0.06285175681f, 0.061320737f, 0.05978957191f, 0.05825826526f, 0.05672682077f, 0.05519524589f, 0.05366353691f, 0.05213170499f, 0.05059975013f, 0.04906767607f, 0.04753548279f, 0.04600318149f, 0.04447077215f, 0.0429382585f, 0.04140564054f, 0.03987292573f, 0.03834012151f, 0.03680722415f, 0.03527423739f, 0.0337411724f, 0.03220802546f, 0.030674804f, 0.02914150804f, 0.02760814503f, 0.02607471868f, 0.02454122901f, 0.02300768159f, 0.02147408016f, 0.01994042844f, 0.01840673015f, 0.01687298715f, 0.01533920597f, 0.01380538847f, 0.01227153838f, 0.01073765941f, 0.009203754365f, 0.007669828832f, 0.006135884672f, 0.004601926077f, 0.003067956772f, 0.001533980132f}; -constant float tc02[512] = {1.0f, 0.9999893904f, 0.9999576211f, 0.9999046922f, 0.9998306036f, 0.9997352958f, 0.9996188283f, 0.9994812012f, 0.9993223548f, 0.9991424084f, 0.9989413023f, 0.9987190366f, 0.9984755516f, 0.9982110262f, 0.9979252815f, 0.9976184368f, 0.9972904325f, 0.996941328f, 0.9965711236f, 0.9961798191f, 0.9957674146f, 0.99533391f, 0.9948793054f, 0.9944036603f, 0.9939069748f, 0.9933891892f, 0.9928504229f, 0.992290616f, 0.9917097688f, 0.9911079407f, 0.9904850721f, 0.9898412824f, 0.9891765118f, 0.9884908199f, 0.9877841473f, 0.9870565534f, 0.9863080978f, 0.9855387211f, 0.9847484827f, 0.9839374423f, 0.9831054807f, 0.982252717f, 0.9813792109f, 0.9804848433f, 0.9795697927f, 0.9786339402f, 0.9776773453f, 0.9767000675f, 0.975702107f, 0.9746835232f, 0.9736442566f, 0.9725843668f, 0.9715039134f, 0.9704028368f, 0.9692812562f, 0.968139112f, 0.9669764638f, 0.9657933712f, 0.9645897746f, 0.9633657932f, 0.9621214271f, 0.9608566165f, 0.9595715404f, 0.9582660794f, 0.9569403529f, 0.9555943608f, 0.9542281032f, 0.9528416395f, 0.9514350295f, 0.9500082731f, 0.9485613704f, 0.9470943809f, 0.9456073046f, 0.9441002607f, 0.9425731897f, 0.9410261512f, 0.9394592047f, 0.9378723502f, 0.9362656474f, 0.9346391559f, 0.932992816f, 0.9313266873f, 0.9296408892f, 0.9279354215f, 0.9262102246f, 0.9244654775f, 0.9227011204f, 0.920917213f, 0.9191138744f, 0.9172909856f, 0.9154487252f, 0.9135870337f, 0.9117060304f, 0.9098057151f, 0.9078860879f, 0.905947268f, 0.903989315f, 0.9020121694f, 0.9000158906f, 0.898000598f, 0.8959662318f, 0.893912971f, 0.8918406963f, 0.8897495866f, 0.8876396418f, 0.8855108619f, 0.8833633661f, 0.8811970949f, 0.8790122271f, 0.8768087029f, 0.8745866418f, 0.8723460436f, 0.8700869679f, 0.8678094745f, 0.8655136228f, 0.8631994128f, 0.8608669639f, 0.8585162163f, 0.8561473489f, 0.8537603021f, 0.851355195f, 0.8489320278f, 0.8464909196f, 0.8440318704f, 0.8415549994f, 0.8390602469f, 0.8365477324f, 0.8340175152f, 0.8314695954f, 0.8289040923f, 0.8263210654f, 0.8237205148f, 0.8211025f, 0.8184671402f, 0.8158144355f, 0.8131443858f, 0.81045717f, 0.8077528477f, 0.8050313592f, 0.8022928238f, 0.7995372415f, 0.796764791f, 0.7939754725f, 0.7911693454f, 0.7883464098f, 0.7855068445f, 0.7826505899f, 0.7797777653f, 0.7768884897f, 0.7739827037f, 0.7710605264f, 0.7681220174f, 0.7651672363f, 0.7621963024f, 0.7592092156f, 0.756205976f, 0.7531868219f, 0.7501516342f, 0.7471005917f, 0.7440337539f, 0.7409511209f, 0.7378528118f, 0.7347388864f, 0.7316094041f, 0.728464365f, 0.7253039479f, 0.7221282125f, 0.718937099f, 0.7157308459f, 0.7125093937f, 0.7092728019f, 0.7060212493f, 0.7027547359f, 0.6994733214f, 0.696177125f, 0.6928661466f, 0.689540565f, 0.6862003207f, 0.6828455329f, 0.6794763207f, 0.6760926843f, 0.6726947427f, 0.6692826152f, 0.6658562422f, 0.6624158025f, 0.6589612961f, 0.6554928422f, 0.65201056f, 0.64851439f, 0.6450045109f, 0.6414810419f, 0.6379439235f, 0.6343932748f, 0.630829215f, 0.6272518039f, 0.6236611009f, 0.6200572252f, 0.616440177f, 0.6128100753f, 0.6091670394f, 0.6055110693f, 0.6018422246f, 0.5981606841f, 0.5944665074f, 0.5907596946f, 0.5870403647f, 0.5833086371f, 0.5795645714f, 0.5758081675f, 0.5720396042f, 0.5682589412f, 0.564466238f, 0.5606615543f, 0.5568450093f, 0.5530167222f, 0.5491766334f, 0.5453249812f, 0.5414617658f, 0.5375870466f, 0.5337010026f, 0.5298036337f, 0.5258949995f, 0.5219752789f, 0.5180445313f, 0.514102757f, 0.510150075f, 0.5061866641f, 0.5022124648f, 0.4982276559f, 0.4942322969f, 0.4902264774f, 0.4862102866f, 0.4821837842f, 0.4781470597f, 0.4741002023f, 0.4700433314f, 0.4659765065f, 0.4618997872f, 0.4578132927f, 0.4537171125f, 0.449611336f, 0.4454960227f, 0.4413712621f, 0.4372371733f, 0.433093816f, 0.4289412796f, 0.4247796834f, 0.4206090868f, 0.4164295495f, 0.4122412205f, 0.4080441594f, 0.4038384557f, 0.3996241987f, 0.3954014778f, 0.3911703825f, 0.3869310021f, 0.3826834261f, 0.3784277439f, 0.3741640747f, 0.3698924482f, 0.3656129837f, 0.3613258004f, 0.3570309579f, 0.3527285457f, 0.3484186828f, 0.344101429f, 0.3397768736f, 0.3354451358f, 0.3311063051f, 0.3267604411f, 0.3224076927f, 0.3180480897f, 0.3136817515f, 0.3093087673f, 0.3049292266f, 0.3005432487f, 0.296150893f, 0.291752249f, 0.2873474658f, 0.282936573f, 0.27851969f, 0.2740969062f, 0.2696683109f, 0.2652340233f, 0.2607941031f, 0.2563486695f, 0.2518978119f, 0.2474416196f, 0.2429801822f, 0.2385135889f, 0.234041959f, 0.2295653671f, 0.2250839174f, 0.2205976844f, 0.2161068022f, 0.2116113305f, 0.2071113735f, 0.2026070356f, 0.1980984062f, 0.1935855895f, 0.1890686601f, 0.1845477372f, 0.1800228953f, 0.1754942536f, 0.1709618866f, 0.1664258987f, 0.161886394f, 0.1573434621f, 0.1527971923f, 0.1482476741f, 0.1436950266f, 0.1391393393f, 0.1345807016f, 0.1300192177f, 0.1254549772f, 0.1208880842f, 0.1163186282f, 0.1117467135f, 0.1071724221f, 0.1025958657f, 0.09801714122f, 0.09343633801f, 0.08885355294f, 0.08426889032f, 0.07968243957f, 0.07509429753f, 0.07050457597f, 0.06591334939f, 0.061320737f, 0.05672682077f, 0.05213170499f, 0.04753548279f, 0.0429382585f, 0.03834012151f, 0.0337411724f, 0.02914150804f, 0.02454122901f, 0.01994042844f, 0.01533920597f, 0.01073765941f, 0.006135884672f, 0.001533980132f, -0.003067956772f, -0.007669828832f, -0.01227153838f, -0.01687298715f, -0.02147408016f, -0.02607471868f, -0.030674804f, -0.03527423739f, -0.03987292573f, -0.04447077215f, -0.04906767607f, -0.05366353691f, -0.05825826526f, -0.06285175681f, -0.06744392216f, -0.07203464955f, -0.07662386447f, -0.08121144772f, -0.08579730988f, -0.09038136154f, -0.09496349841f, -0.09954361618f, -0.1041216329f, -0.1086974442f, -0.1132709533f, -0.1178420633f, -0.1224106774f, -0.1269766986f, -0.1315400302f, -0.1361005753f, -0.1406582445f, -0.1452129185f, -0.1497645378f, -0.1543129683f, -0.1588581502f, -0.1633999497f, -0.167938292f, -0.1724730879f, -0.1770042181f, -0.1815316081f, -0.1860551536f, -0.1905747503f, -0.1950903237f, -0.1996017545f, -0.2041089684f, -0.208611846f, -0.2131103128f, -0.2176042795f, -0.2220936269f, -0.2265782654f, -0.2310581058f, -0.2355330586f, -0.2400030196f, -0.2444678992f, -0.2489276081f, -0.2533820271f, -0.2578310966f, -0.2622747123f, -0.266712755f, -0.271145165f, -0.2755718231f, -0.27999264f, -0.2844075263f, -0.2888164222f, -0.2932191491f, -0.2976157069f, -0.3020059466f, -0.3063898087f, -0.310767144f, -0.3151379228f, -0.3195020258f, -0.3238593638f, -0.3282098472f, -0.3325533569f, -0.336889863f, -0.3412192166f, -0.3455413282f, -0.3498561382f, -0.3541635275f, -0.3584634066f, -0.3627557158f, -0.3670403361f, -0.3713172078f, -0.3755861819f, -0.3798471987f, -0.3841001987f, -0.3883450329f, -0.3925816715f, -0.3968099952f, -0.4010298848f, -0.4052413106f, -0.4094441533f, -0.4136383235f, -0.4178237021f, -0.4220002592f, -0.4261678755f, -0.4303264916f, -0.4344759583f, -0.438616246f, -0.4427472353f, -0.4468688369f, -0.4509809911f, -0.4550835788f, -0.4591765404f, -0.4632597864f, -0.4673331976f, -0.4713967443f, -0.4754502773f, -0.479493767f, -0.4835270643f, -0.4875501692f, -0.4915629029f, -0.4955652654f, -0.4995571077f, -0.5035383701f, -0.5075089931f, -0.5114688277f, -0.5154178739f, -0.5193560123f, -0.523283124f, -0.5271991491f, -0.5311040282f, -0.534997642f, -0.538879931f, -0.5427507758f, -0.5466101766f, -0.5504579544f, -0.5542941093f, -0.5581185222f, -0.5619311333f, -0.5657318234f, -0.5695205331f, -0.573297143f, -0.5770616531f, -0.5808139443f, -0.584553957f, -0.5882815719f, -0.5919966698f, -0.5956993103f, -0.5993893147f, -0.6030666232f, -0.6067311168f, -0.6103827953f, -0.6140215397f, -0.6176472902f, -0.6212599874f, -0.6248595119f, -0.6284457445f, -0.6320187449f, -0.6355783343f, -0.6391244531f, -0.6426570415f, -0.6461760402f, -0.6496813297f, -0.6531728506f, -0.6566505432f, -0.6601143479f, -0.6635641456f, -0.6669999361f, -0.6704215407f, -0.6738290191f, -0.6772221923f, -0.6806010008f, -0.683965385f, -0.6873153448f, -0.6906507015f, -0.6939714551f, -0.6972774863f, -0.7005687952f, -0.7038452625f}; -constant float tc05[512] = {-0.7071067691f, -0.7103533745f, -0.7135848403f, -0.7168012857f, -0.720002532f, -0.7231884599f, -0.726359129f, -0.72951442f, -0.7326542735f, -0.7357785702f, -0.73888731f, -0.7419804335f, -0.7450577617f, -0.7481193542f, -0.7511651516f, -0.7541949749f, -0.7572088242f, -0.7602066994f, -0.7631884217f, -0.7661539912f, -0.7691033483f, -0.7720363736f, -0.7749531269f, -0.7778534293f, -0.7807372212f, -0.7836045027f, -0.786455214f, -0.7892892361f, -0.7921065688f, -0.7949071527f, -0.7976908684f, -0.8004576564f, -0.8032075167f, -0.8059403896f, -0.8086561561f, -0.8113548756f, -0.8140363097f, -0.8167005777f, -0.8193475008f, -0.8219771385f, -0.8245893121f, -0.8271840215f, -0.8297612071f, -0.832320869f, -0.8348628879f, -0.8373872042f, -0.8398938179f, -0.8423826098f, -0.84485358f, -0.8473066092f, -0.8497417569f, -0.8521589041f, -0.854557991f, -0.8569389582f, -0.8593018055f, -0.8616464734f, -0.8639728427f, -0.866280973f, -0.8685706854f, -0.8708420396f, -0.8730949759f, -0.8753293753f, -0.8775452971f, -0.8797426224f, -0.8819212914f, -0.8840812445f, -0.8862225413f, -0.8883450627f, -0.8904487491f, -0.8925335407f, -0.8945994973f, -0.8966464996f, -0.8986744881f, -0.900683403f, -0.9026733041f, -0.9046440721f, -0.9065957069f, -0.9085280895f, -0.9104412794f, -0.9123351574f, -0.9142097831f, -0.9160649776f, -0.9179008007f, -0.919717133f, -0.9215140343f, -0.9232914448f, -0.9250492454f, -0.9267874956f, -0.9285060763f, -0.9302050471f, -0.9318842888f, -0.9335438013f, -0.9351835251f, -0.9368034601f, -0.9384035468f, -0.9399837255f, -0.9415440559f, -0.9430844188f, -0.9446048141f, -0.9461052418f, -0.9475855827f, -0.9490458965f, -0.950486064f, -0.9519061446f, -0.9533060193f, -0.9546857476f, -0.95604527f, -0.9573845267f, -0.9587034583f, -0.9600021243f, -0.9612804651f, -0.9625384808f, -0.963776052f, -0.9649932384f, -0.9661899805f, -0.9673662782f, -0.9685220718f, -0.9696573615f, -0.9707721472f, -0.9718663096f, -0.9729399681f, -0.9739929438f, -0.9750253558f, -0.9760370851f, -0.9770281315f, -0.9779984951f, -0.9789481759f, -0.9798771143f, -0.9807852507f, -0.9816727042f, -0.9825392962f, -0.9833850861f, -0.9842100739f, -0.9850142598f, -0.9857975245f, -0.9865599275f, -0.9873014092f, -0.9880220294f, -0.9887216687f, -0.9894004464f, -0.9900581837f, -0.9906949997f, -0.9913108349f, -0.9919056892f, -0.9924795628f, -0.9930323362f, -0.9935641289f, -0.9940748811f, -0.9945645928f, -0.9950332046f, -0.9954807758f, -0.9959072471f, -0.9963126183f, -0.9966968894f, -0.9970600605f, -0.9974021316f, -0.997723043f, -0.9980228543f, -0.9983015656f, -0.9985590577f, -0.9987954497f, -0.9990106821f, -0.9992047548f, -0.9993776679f, -0.9995294213f, -0.9996600151f, -0.9997693896f, -0.9998576641f, -0.9999247193f, -0.9999706149f, -0.9999952912f, -0.9999988079f, -0.9999811649f, -0.9999423623f, -0.9998823404f, -0.9998011589f, -0.9996988177f, -0.9995753169f, -0.9994305968f, -0.9992647767f, -0.9990777373f, -0.9988695383f, -0.9986402392f, -0.9983897209f, -0.9981181026f, -0.9978253245f, -0.9975114465f, -0.9971764088f, -0.996820271f, -0.9964430332f, -0.9960446954f, -0.9956252575f, -0.9951847196f, -0.9947231412f, -0.9942404628f, -0.9937367439f, -0.993211925f, -0.9926661253f, -0.9920992851f, -0.9915114641f, -0.9909026623f, -0.99027282f, -0.9896219969f, -0.9889502525f, -0.988257587f, -0.9875439405f, -0.9868093729f, -0.9860539436f, -0.9852776527f, -0.9844804406f, -0.9836624265f, -0.9828235507f, -0.9819638729f, -0.9810833931f, -0.9801821113f, -0.9792601466f, -0.97831738f, -0.9773538709f, -0.9763697386f, -0.9753648639f, -0.974339366f, -0.9732932448f, -0.9722265005f, -0.971139133f, -0.9700312614f, -0.9689028263f, -0.9677538276f, -0.9665843844f, -0.9653944373f, -0.9641840458f, -0.9629532695f, -0.9617020488f, -0.9604305029f, -0.9591386318f, -0.9578264356f, -0.9564939141f, -0.9551411867f, -0.9537681937f, -0.9523749948f, -0.9509616494f, -0.9495281577f, -0.9480745792f, -0.946600914f, -0.9451072216f, -0.9435934424f, -0.9420597553f, -0.940506041f, -0.9389324784f, -0.9373390079f, -0.9357256889f, -0.9340925217f, -0.9324396253f, -0.9307669401f, -0.9290745854f, -0.9273625016f, -0.9256308079f, -0.9238795042f, -0.9221086502f, -0.9203183055f, -0.9185084105f, -0.9166790843f, -0.914830327f, -0.9129621983f, -0.9110747576f, -0.909168005f, -0.9072420001f, -0.9052967429f, -0.9033323526f, -0.9013488293f, -0.8993462324f, -0.8973245621f, -0.8952839375f, -0.893224299f, -0.8911457658f, -0.8890483379f, -0.8869321346f, -0.8847970963f, -0.882643342f, -0.8804708719f, -0.8782798052f, -0.8760700822f, -0.8738418221f, -0.8715950847f, -0.8693298697f, -0.867046237f, -0.864744246f, -0.8624239564f, -0.8600853682f, -0.8577286005f, -0.8553536534f, -0.8529605865f, -0.8505494595f, -0.8481203318f, -0.8456732631f, -0.8432082534f, -0.8407253623f, -0.838224709f, -0.8357062936f, -0.8331701756f, -0.8306164145f, -0.8280450702f, -0.8254561424f, -0.8228498101f, -0.8202259541f, -0.8175848126f, -0.8149263263f, -0.8122506142f, -0.8095576167f, -0.8068475723f, -0.8041203618f, -0.801376164f, -0.7986149788f, -0.7958369255f, -0.7930419445f, -0.7902302146f, -0.7874017358f, -0.7845565677f, -0.7816948295f, -0.7788165212f, -0.7759217024f, -0.7730104327f, -0.7700828314f, -0.7671388984f, -0.7641787529f, -0.761202395f, -0.7582098842f, -0.7552013993f, -0.7521768212f, -0.7491363883f, -0.7460801005f, -0.7430079579f, -0.7399200797f, -0.7368165851f, -0.7336974144f, -0.7305627465f, -0.727412641f, -0.724247098f, -0.7210661769f, -0.7178700566f, -0.7146586776f, -0.7114322186f, -0.7081906199f, -0.7049340606f, -0.7016626f, -0.6983762383f, -0.6950750947f, -0.6917592287f, -0.6884287596f, -0.6850836873f, -0.6817240715f, -0.6783500314f, -0.6749616265f, -0.6715589762f, -0.6681420207f, -0.6647109985f, -0.6612658501f, -0.6578066945f, -0.6543335915f, -0.6508466601f, -0.6473459601f, -0.6438315511f, -0.6403034925f, -0.6367618442f, -0.6332067847f, -0.6296382546f, -0.6260563731f, -0.6224612594f, -0.618852973f, -0.6152315736f, -0.6115971804f, -0.6079497933f, -0.6042895317f, -0.6006164551f, -0.5969306827f, -0.5932322741f, -0.5895212889f, -0.5857978463f, -0.582062006f, -0.5783137679f, -0.5745533705f, -0.5707807541f, -0.566996038f, -0.5631993413f, -0.5593907237f, -0.5555702448f, -0.5517379642f, -0.5478940606f, -0.5440385342f, -0.5401714444f, -0.5362929702f, -0.5324031115f, -0.5285019875f, -0.5245896578f, -0.5206662416f, -0.5167317986f, -0.5127863884f, -0.5088301301f, -0.5048630834f, -0.5008853674f, -0.4968970418f, -0.492898196f, -0.4888888896f, -0.4848692417f, -0.4808393419f, -0.4767992198f, -0.4727490246f, -0.4686888158f, -0.4646186829f, -0.4605387151f, -0.4564489722f, -0.4523495734f, -0.448240608f, -0.4441221356f, -0.4399942756f, -0.4358570874f, -0.4317106605f, -0.4275550842f, -0.4233904779f, -0.4192169011f, -0.4150344133f, -0.4108431637f, -0.4066432118f, -0.4024346471f, -0.3982175589f, -0.3939920366f, -0.3897581697f, -0.3855160475f, -0.3812657595f, -0.3770074248f, -0.3727410734f, -0.3684668243f, -0.3641847968f, -0.3598950505f, -0.3555976748f, -0.3512927592f, -0.3469804227f, -0.3426607251f, -0.3383337557f, -0.3339996636f, -0.3296584487f, -0.3253102899f, -0.3209552467f, -0.3165933788f, -0.3122248054f, -0.3078496456f, -0.3034679592f, -0.2990798354f, -0.2946853638f, -0.2902846634f, -0.2858778238f, -0.2814649343f, -0.2770460844f, -0.2726213634f, -0.2681908607f, -0.2637546659f, -0.2593129277f, -0.2548656464f, -0.2504130006f, -0.24595505f, -0.241491884f, -0.2370236069f, -0.2325503081f, -0.228072077f, -0.2235890329f, -0.2191012353f, -0.2146088183f, -0.2101118416f, -0.2056104094f, -0.201104641f, -0.1965945959f, -0.1920803934f, -0.1875621229f, -0.1830398887f, -0.1785137653f, -0.1739838719f, -0.169450298f, -0.1649131179f, -0.1603724509f, -0.1558284014f, -0.1512810439f, -0.1467304677f, -0.1421768069f, -0.1376201212f, -0.1330605298f, -0.1284981072f, -0.1239329726f, -0.1193652153f, -0.1147949249f, -0.1102222055f, -0.1056471542f, -0.1010698602f, -0.09649042785f, -0.09190895408f, -0.08732553571f, -0.08274026215f, -0.07815324515f, -0.07356456667f, -0.06897433102f, -0.06438262761f, -0.05978957191f, -0.05519524589f, -0.05059975013f, -0.04600318149f, -0.04140564054f, -0.03680722415f, -0.03220802546f, -0.02760814503f, -0.02300768159f, -0.01840673015f, -0.01380538847f, -0.009203754365f, -0.004601926077f}; -constant float tc10[512] = {1.0f, 0.9999247193f, 0.9996988177f, 0.9993223548f, 0.9987954497f, 0.9981181026f, 0.9972904325f, 0.9963126183f, 0.9951847196f, 0.9939069748f, 0.9924795628f, 0.9909026623f, 0.9891765118f, 0.9873014092f, 0.9852776527f, 0.9831054807f, 0.9807852507f, 0.97831738f, 0.975702107f, 0.9729399681f, 0.9700312614f, 0.9669764638f, 0.963776052f, 0.9604305029f, 0.9569403529f, 0.9533060193f, 0.9495281577f, 0.9456073046f, 0.9415440559f, 0.9373390079f, 0.932992816f, 0.9285060763f, 0.9238795042f, 0.9191138744f, 0.9142097831f, 0.909168005f, 0.903989315f, 0.8986744881f, 0.893224299f, 0.8876396418f, 0.8819212914f, 0.8760700822f, 0.8700869679f, 0.8639728427f, 0.8577286005f, 0.851355195f, 0.84485358f, 0.838224709f, 0.8314695954f, 0.8245893121f, 0.8175848126f, 0.81045717f, 0.8032075167f, 0.7958369255f, 0.7883464098f, 0.7807372212f, 0.7730104327f, 0.7651672363f, 0.7572088242f, 0.7491363883f, 0.7409511209f, 0.7326542735f, 0.724247098f, 0.7157308459f, 0.7071067691f, 0.6983762383f, 0.689540565f, 0.6806010008f, 0.6715589762f, 0.6624158025f, 0.6531728506f, 0.6438315511f, 0.6343932748f, 0.6248595119f, 0.6152315736f, 0.6055110693f, 0.5956993103f, 0.5857978463f, 0.5758081675f, 0.5657318234f, 0.5555702448f, 0.5453249812f, 0.534997642f, 0.5245896578f, 0.514102757f, 0.5035383701f, 0.492898196f, 0.4821837842f, 0.4713967443f, 0.4605387151f, 0.449611336f, 0.438616246f, 0.4275550842f, 0.4164295495f, 0.4052413106f, 0.3939920366f, 0.3826834261f, 0.3713172078f, 0.3598950505f, 0.3484186828f, 0.336889863f, 0.3253102899f, 0.3136817515f, 0.3020059466f, 0.2902846634f, 0.27851969f, 0.266712755f, 0.2548656464f, 0.2429801822f, 0.2310581058f, 0.2191012353f, 0.2071113735f, 0.1950903237f, 0.1830398887f, 0.1709618866f, 0.1588581502f, 0.1467304677f, 0.1345807016f, 0.1224106774f, 0.1102222055f, 0.09801714122f, 0.08579730988f, 0.07356456667f, 0.061320737f, 0.04906767607f, 0.03680722415f, 0.02454122901f, 0.01227153838f, 6.123234263e-17f, -0.01227153838f, -0.02454122901f, -0.03680722415f, -0.04906767607f, -0.061320737f, -0.07356456667f, -0.08579730988f, -0.09801714122f, -0.1102222055f, -0.1224106774f, -0.1345807016f, -0.1467304677f, -0.1588581502f, -0.1709618866f, -0.1830398887f, -0.1950903237f, -0.2071113735f, -0.2191012353f, -0.2310581058f, -0.2429801822f, -0.2548656464f, -0.266712755f, -0.27851969f, -0.2902846634f, -0.3020059466f, -0.3136817515f, -0.3253102899f, -0.336889863f, -0.3484186828f, -0.3598950505f, -0.3713172078f, -0.3826834261f, -0.3939920366f, -0.4052413106f, -0.4164295495f, -0.4275550842f, -0.438616246f, -0.449611336f, -0.4605387151f, -0.4713967443f, -0.4821837842f, -0.492898196f, -0.5035383701f, -0.514102757f, -0.5245896578f, -0.534997642f, -0.5453249812f, -0.5555702448f, -0.5657318234f, -0.5758081675f, -0.5857978463f, -0.5956993103f, -0.6055110693f, -0.6152315736f, -0.6248595119f, -0.6343932748f, -0.6438315511f, -0.6531728506f, -0.6624158025f, -0.6715589762f, -0.6806010008f, -0.689540565f, -0.6983762383f, -0.7071067691f, -0.7157308459f, -0.724247098f, -0.7326542735f, -0.7409511209f, -0.7491363883f, -0.7572088242f, -0.7651672363f, -0.7730104327f, -0.7807372212f, -0.7883464098f, -0.7958369255f, -0.8032075167f, -0.81045717f, -0.8175848126f, -0.8245893121f, -0.8314695954f, -0.838224709f, -0.84485358f, -0.851355195f, -0.8577286005f, -0.8639728427f, -0.8700869679f, -0.8760700822f, -0.8819212914f, -0.8876396418f, -0.893224299f, -0.8986744881f, -0.903989315f, -0.909168005f, -0.9142097831f, -0.9191138744f, -0.9238795042f, -0.9285060763f, -0.932992816f, -0.9373390079f, -0.9415440559f, -0.9456073046f, -0.9495281577f, -0.9533060193f, -0.9569403529f, -0.9604305029f, -0.963776052f, -0.9669764638f, -0.9700312614f, -0.9729399681f, -0.975702107f, -0.97831738f, -0.9807852507f, -0.9831054807f, -0.9852776527f, -0.9873014092f, -0.9891765118f, -0.9909026623f, -0.9924795628f, -0.9939069748f, -0.9951847196f, -0.9963126183f, -0.9972904325f, -0.9981181026f, -0.9987954497f, -0.9993223548f, -0.9996988177f, -0.9999247193f, 1.0f, 0.9999247193f, 0.9996988177f, 0.9993223548f, 0.9987954497f, 0.9981181026f, 0.9972904325f, 0.9963126183f, 0.9951847196f, 0.9939069748f, 0.9924795628f, 0.9909026623f, 0.9891765118f, 0.9873014092f, 0.9852776527f, 0.9831054807f, 0.9807852507f, 0.97831738f, 0.975702107f, 0.9729399681f, 0.9700312614f, 0.9669764638f, 0.963776052f, 0.9604305029f, 0.9569403529f, 0.9533060193f, 0.9495281577f, 0.9456073046f, 0.9415440559f, 0.9373390079f, 0.932992816f, 0.9285060763f, 0.9238795042f, 0.9191138744f, 0.9142097831f, 0.909168005f, 0.903989315f, 0.8986744881f, 0.893224299f, 0.8876396418f, 0.8819212914f, 0.8760700822f, 0.8700869679f, 0.8639728427f, 0.8577286005f, 0.851355195f, 0.84485358f, 0.838224709f, 0.8314695954f, 0.8245893121f, 0.8175848126f, 0.81045717f, 0.8032075167f, 0.7958369255f, 0.7883464098f, 0.7807372212f, 0.7730104327f, 0.7651672363f, 0.7572088242f, 0.7491363883f, 0.7409511209f, 0.7326542735f, 0.724247098f, 0.7157308459f, 0.7071067691f, 0.6983762383f, 0.689540565f, 0.6806010008f, 0.6715589762f, 0.6624158025f, 0.6531728506f, 0.6438315511f, 0.6343932748f, 0.6248595119f, 0.6152315736f, 0.6055110693f, 0.5956993103f, 0.5857978463f, 0.5758081675f, 0.5657318234f, 0.5555702448f, 0.5453249812f, 0.534997642f, 0.5245896578f, 0.514102757f, 0.5035383701f, 0.492898196f, 0.4821837842f, 0.4713967443f, 0.4605387151f, 0.449611336f, 0.438616246f, 0.4275550842f, 0.4164295495f, 0.4052413106f, 0.3939920366f, 0.3826834261f, 0.3713172078f, 0.3598950505f, 0.3484186828f, 0.336889863f, 0.3253102899f, 0.3136817515f, 0.3020059466f, 0.2902846634f, 0.27851969f, 0.266712755f, 0.2548656464f, 0.2429801822f, 0.2310581058f, 0.2191012353f, 0.2071113735f, 0.1950903237f, 0.1830398887f, 0.1709618866f, 0.1588581502f, 0.1467304677f, 0.1345807016f, 0.1224106774f, 0.1102222055f, 0.09801714122f, 0.08579730988f, 0.07356456667f, 0.061320737f, 0.04906767607f, 0.03680722415f, 0.02454122901f, 0.01227153838f, 6.123234263e-17f, -0.01227153838f, -0.02454122901f, -0.03680722415f, -0.04906767607f, -0.061320737f, -0.07356456667f, -0.08579730988f, -0.09801714122f, -0.1102222055f, -0.1224106774f, -0.1345807016f, -0.1467304677f, -0.1588581502f, -0.1709618866f, -0.1830398887f, -0.1950903237f, -0.2071113735f, -0.2191012353f, -0.2310581058f, -0.2429801822f, -0.2548656464f, -0.266712755f, -0.27851969f, -0.2902846634f, -0.3020059466f, -0.3136817515f, -0.3253102899f, -0.336889863f, -0.3484186828f, -0.3598950505f, -0.3713172078f, -0.3826834261f, -0.3939920366f, -0.4052413106f, -0.4164295495f, -0.4275550842f, -0.438616246f, -0.449611336f, -0.4605387151f, -0.4713967443f, -0.4821837842f, -0.492898196f, -0.5035383701f, -0.514102757f, -0.5245896578f, -0.534997642f, -0.5453249812f, -0.5555702448f, -0.5657318234f, -0.5758081675f, -0.5857978463f, -0.5956993103f, -0.6055110693f, -0.6152315736f, -0.6248595119f, -0.6343932748f, -0.6438315511f, -0.6531728506f, -0.6624158025f, -0.6715589762f, -0.6806010008f, -0.689540565f, -0.6983762383f, -0.7071067691f, -0.7157308459f, -0.724247098f, -0.7326542735f, -0.7409511209f, -0.7491363883f, -0.7572088242f, -0.7651672363f, -0.7730104327f, -0.7807372212f, -0.7883464098f, -0.7958369255f, -0.8032075167f, -0.81045717f, -0.8175848126f, -0.8245893121f, -0.8314695954f, -0.838224709f, -0.84485358f, -0.851355195f, -0.8577286005f, -0.8639728427f, -0.8700869679f, -0.8760700822f, -0.8819212914f, -0.8876396418f, -0.893224299f, -0.8986744881f, -0.903989315f, -0.909168005f, -0.9142097831f, -0.9191138744f, -0.9238795042f, -0.9285060763f, -0.932992816f, -0.9373390079f, -0.9415440559f, -0.9456073046f, -0.9495281577f, -0.9533060193f, -0.9569403529f, -0.9604305029f, -0.963776052f, -0.9669764638f, -0.9700312614f, -0.9729399681f, -0.975702107f, -0.97831738f, -0.9807852507f, -0.9831054807f, -0.9852776527f, -0.9873014092f, -0.9891765118f, -0.9909026623f, -0.9924795628f, -0.9939069748f, -0.9951847196f, -0.9963126183f, -0.9972904325f, -0.9981181026f, -0.9987954497f, -0.9993223548f, -0.9996988177f, -0.9999247193f}; -constant float tc13[512] = {1.0f, 0.9999247193f, 0.9996988177f, 0.9993223548f, 0.9987954497f, 0.9981181026f, 0.9972904325f, 0.9963126183f, 0.9951847196f, 0.9939069748f, 0.9924795628f, 0.9909026623f, 0.9891765118f, 0.9873014092f, 0.9852776527f, 0.9831054807f, 0.9807852507f, 0.97831738f, 0.975702107f, 0.9729399681f, 0.9700312614f, 0.9669764638f, 0.963776052f, 0.9604305029f, 0.9569403529f, 0.9533060193f, 0.9495281577f, 0.9456073046f, 0.9415440559f, 0.9373390079f, 0.932992816f, 0.9285060763f, 0.9238795042f, 0.9191138744f, 0.9142097831f, 0.909168005f, 0.903989315f, 0.8986744881f, 0.893224299f, 0.8876396418f, 0.8819212914f, 0.8760700822f, 0.8700869679f, 0.8639728427f, 0.8577286005f, 0.851355195f, 0.84485358f, 0.838224709f, 0.8314695954f, 0.8245893121f, 0.8175848126f, 0.81045717f, 0.8032075167f, 0.7958369255f, 0.7883464098f, 0.7807372212f, 0.7730104327f, 0.7651672363f, 0.7572088242f, 0.7491363883f, 0.7409511209f, 0.7326542735f, 0.724247098f, 0.7157308459f, 0.7071067691f, 0.6983762383f, 0.689540565f, 0.6806010008f, 0.6715589762f, 0.6624158025f, 0.6531728506f, 0.6438315511f, 0.6343932748f, 0.6248595119f, 0.6152315736f, 0.6055110693f, 0.5956993103f, 0.5857978463f, 0.5758081675f, 0.5657318234f, 0.5555702448f, 0.5453249812f, 0.534997642f, 0.5245896578f, 0.514102757f, 0.5035383701f, 0.492898196f, 0.4821837842f, 0.4713967443f, 0.4605387151f, 0.449611336f, 0.438616246f, 0.4275550842f, 0.4164295495f, 0.4052413106f, 0.3939920366f, 0.3826834261f, 0.3713172078f, 0.3598950505f, 0.3484186828f, 0.336889863f, 0.3253102899f, 0.3136817515f, 0.3020059466f, 0.2902846634f, 0.27851969f, 0.266712755f, 0.2548656464f, 0.2429801822f, 0.2310581058f, 0.2191012353f, 0.2071113735f, 0.1950903237f, 0.1830398887f, 0.1709618866f, 0.1588581502f, 0.1467304677f, 0.1345807016f, 0.1224106774f, 0.1102222055f, 0.09801714122f, 0.08579730988f, 0.07356456667f, 0.061320737f, 0.04906767607f, 0.03680722415f, 0.02454122901f, 0.01227153838f, 6.123234263e-17f, -0.01227153838f, -0.02454122901f, -0.03680722415f, -0.04906767607f, -0.061320737f, -0.07356456667f, -0.08579730988f, -0.09801714122f, -0.1102222055f, -0.1224106774f, -0.1345807016f, -0.1467304677f, -0.1588581502f, -0.1709618866f, -0.1830398887f, -0.1950903237f, -0.2071113735f, -0.2191012353f, -0.2310581058f, -0.2429801822f, -0.2548656464f, -0.266712755f, -0.27851969f, -0.2902846634f, -0.3020059466f, -0.3136817515f, -0.3253102899f, -0.336889863f, -0.3484186828f, -0.3598950505f, -0.3713172078f, -0.3826834261f, -0.3939920366f, -0.4052413106f, -0.4164295495f, -0.4275550842f, -0.438616246f, -0.449611336f, -0.4605387151f, -0.4713967443f, -0.4821837842f, -0.492898196f, -0.5035383701f, -0.514102757f, -0.5245896578f, -0.534997642f, -0.5453249812f, -0.5555702448f, -0.5657318234f, -0.5758081675f, -0.5857978463f, -0.5956993103f, -0.6055110693f, -0.6152315736f, -0.6248595119f, -0.6343932748f, -0.6438315511f, -0.6531728506f, -0.6624158025f, -0.6715589762f, -0.6806010008f, -0.689540565f, -0.6983762383f, -0.7071067691f, -0.7157308459f, -0.724247098f, -0.7326542735f, -0.7409511209f, -0.7491363883f, -0.7572088242f, -0.7651672363f, -0.7730104327f, -0.7807372212f, -0.7883464098f, -0.7958369255f, -0.8032075167f, -0.81045717f, -0.8175848126f, -0.8245893121f, -0.8314695954f, -0.838224709f, -0.84485358f, -0.851355195f, -0.8577286005f, -0.8639728427f, -0.8700869679f, -0.8760700822f, -0.8819212914f, -0.8876396418f, -0.893224299f, -0.8986744881f, -0.903989315f, -0.909168005f, -0.9142097831f, -0.9191138744f, -0.9238795042f, -0.9285060763f, -0.932992816f, -0.9373390079f, -0.9415440559f, -0.9456073046f, -0.9495281577f, -0.9533060193f, -0.9569403529f, -0.9604305029f, -0.963776052f, -0.9669764638f, -0.9700312614f, -0.9729399681f, -0.975702107f, -0.97831738f, -0.9807852507f, -0.9831054807f, -0.9852776527f, -0.9873014092f, -0.9891765118f, -0.9909026623f, -0.9924795628f, -0.9939069748f, -0.9951847196f, -0.9963126183f, -0.9972904325f, -0.9981181026f, -0.9987954497f, -0.9993223548f, -0.9996988177f, -0.9999247193f, 1.0f, 0.9999247193f, 0.9996988177f, 0.9993223548f, 0.9987954497f, 0.9981181026f, 0.9972904325f, 0.9963126183f, 0.9951847196f, 0.9939069748f, 0.9924795628f, 0.9909026623f, 0.9891765118f, 0.9873014092f, 0.9852776527f, 0.9831054807f, 0.9807852507f, 0.97831738f, 0.975702107f, 0.9729399681f, 0.9700312614f, 0.9669764638f, 0.963776052f, 0.9604305029f, 0.9569403529f, 0.9533060193f, 0.9495281577f, 0.9456073046f, 0.9415440559f, 0.9373390079f, 0.932992816f, 0.9285060763f, 0.9238795042f, 0.9191138744f, 0.9142097831f, 0.909168005f, 0.903989315f, 0.8986744881f, 0.893224299f, 0.8876396418f, 0.8819212914f, 0.8760700822f, 0.8700869679f, 0.8639728427f, 0.8577286005f, 0.851355195f, 0.84485358f, 0.838224709f, 0.8314695954f, 0.8245893121f, 0.8175848126f, 0.81045717f, 0.8032075167f, 0.7958369255f, 0.7883464098f, 0.7807372212f, 0.7730104327f, 0.7651672363f, 0.7572088242f, 0.7491363883f, 0.7409511209f, 0.7326542735f, 0.724247098f, 0.7157308459f, 0.7071067691f, 0.6983762383f, 0.689540565f, 0.6806010008f, 0.6715589762f, 0.6624158025f, 0.6531728506f, 0.6438315511f, 0.6343932748f, 0.6248595119f, 0.6152315736f, 0.6055110693f, 0.5956993103f, 0.5857978463f, 0.5758081675f, 0.5657318234f, 0.5555702448f, 0.5453249812f, 0.534997642f, 0.5245896578f, 0.514102757f, 0.5035383701f, 0.492898196f, 0.4821837842f, 0.4713967443f, 0.4605387151f, 0.449611336f, 0.438616246f, 0.4275550842f, 0.4164295495f, 0.4052413106f, 0.3939920366f, 0.3826834261f, 0.3713172078f, 0.3598950505f, 0.3484186828f, 0.336889863f, 0.3253102899f, 0.3136817515f, 0.3020059466f, 0.2902846634f, 0.27851969f, 0.266712755f, 0.2548656464f, 0.2429801822f, 0.2310581058f, 0.2191012353f, 0.2071113735f, 0.1950903237f, 0.1830398887f, 0.1709618866f, 0.1588581502f, 0.1467304677f, 0.1345807016f, 0.1224106774f, 0.1102222055f, 0.09801714122f, 0.08579730988f, 0.07356456667f, 0.061320737f, 0.04906767607f, 0.03680722415f, 0.02454122901f, 0.01227153838f, 6.123234263e-17f, -0.01227153838f, -0.02454122901f, -0.03680722415f, -0.04906767607f, -0.061320737f, -0.07356456667f, -0.08579730988f, -0.09801714122f, -0.1102222055f, -0.1224106774f, -0.1345807016f, -0.1467304677f, -0.1588581502f, -0.1709618866f, -0.1830398887f, -0.1950903237f, -0.2071113735f, -0.2191012353f, -0.2310581058f, -0.2429801822f, -0.2548656464f, -0.266712755f, -0.27851969f, -0.2902846634f, -0.3020059466f, -0.3136817515f, -0.3253102899f, -0.336889863f, -0.3484186828f, -0.3598950505f, -0.3713172078f, -0.3826834261f, -0.3939920366f, -0.4052413106f, -0.4164295495f, -0.4275550842f, -0.438616246f, -0.449611336f, -0.4605387151f, -0.4713967443f, -0.4821837842f, -0.492898196f, -0.5035383701f, -0.514102757f, -0.5245896578f, -0.534997642f, -0.5453249812f, -0.5555702448f, -0.5657318234f, -0.5758081675f, -0.5857978463f, -0.5956993103f, -0.6055110693f, -0.6152315736f, -0.6248595119f, -0.6343932748f, -0.6438315511f, -0.6531728506f, -0.6624158025f, -0.6715589762f, -0.6806010008f, -0.689540565f, -0.6983762383f, -0.7071067691f, -0.7157308459f, -0.724247098f, -0.7326542735f, -0.7409511209f, -0.7491363883f, -0.7572088242f, -0.7651672363f, -0.7730104327f, -0.7807372212f, -0.7883464098f, -0.7958369255f, -0.8032075167f, -0.81045717f, -0.8175848126f, -0.8245893121f, -0.8314695954f, -0.838224709f, -0.84485358f, -0.851355195f, -0.8577286005f, -0.8639728427f, -0.8700869679f, -0.8760700822f, -0.8819212914f, -0.8876396418f, -0.893224299f, -0.8986744881f, -0.903989315f, -0.909168005f, -0.9142097831f, -0.9191138744f, -0.9238795042f, -0.9285060763f, -0.932992816f, -0.9373390079f, -0.9415440559f, -0.9456073046f, -0.9495281577f, -0.9533060193f, -0.9569403529f, -0.9604305029f, -0.963776052f, -0.9669764638f, -0.9700312614f, -0.9729399681f, -0.975702107f, -0.97831738f, -0.9807852507f, -0.9831054807f, -0.9852776527f, -0.9873014092f, -0.9891765118f, -0.9909026623f, -0.9924795628f, -0.9939069748f, -0.9951847196f, -0.9963126183f, -0.9972904325f, -0.9981181026f, -0.9987954497f, -0.9993223548f, -0.9996988177f, -0.9999247193f}; -constant float tc11[512] = {1.0f, 0.9999811649f, 0.9999247193f, 0.9998306036f, 0.9996988177f, 0.9995294213f, 0.9993223548f, 0.9990777373f, 0.9987954497f, 0.9984755516f, 0.9981181026f, 0.997723043f, 0.9972904325f, 0.996820271f, 0.9963126183f, 0.9957674146f, 0.9951847196f, 0.9945645928f, 0.9939069748f, 0.993211925f, 0.9924795628f, 0.9917097688f, 0.9909026623f, 0.9900581837f, 0.9891765118f, 0.988257587f, 0.9873014092f, 0.9863080978f, 0.9852776527f, 0.9842100739f, 0.9831054807f, 0.9819638729f, 0.9807852507f, 0.9795697927f, 0.97831738f, 0.9770281315f, 0.975702107f, 0.974339366f, 0.9729399681f, 0.9715039134f, 0.9700312614f, 0.9685220718f, 0.9669764638f, 0.9653944373f, 0.963776052f, 0.9621214271f, 0.9604305029f, 0.9587034583f, 0.9569403529f, 0.9551411867f, 0.9533060193f, 0.9514350295f, 0.9495281577f, 0.9475855827f, 0.9456073046f, 0.9435934424f, 0.9415440559f, 0.9394592047f, 0.9373390079f, 0.9351835251f, 0.932992816f, 0.9307669401f, 0.9285060763f, 0.9262102246f, 0.9238795042f, 0.9215140343f, 0.9191138744f, 0.9166790843f, 0.9142097831f, 0.9117060304f, 0.909168005f, 0.9065957069f, 0.903989315f, 0.9013488293f, 0.8986744881f, 0.8959662318f, 0.893224299f, 0.8904487491f, 0.8876396418f, 0.8847970963f, 0.8819212914f, 0.8790122271f, 0.8760700822f, 0.8730949759f, 0.8700869679f, 0.867046237f, 0.8639728427f, 0.8608669639f, 0.8577286005f, 0.854557991f, 0.851355195f, 0.8481203318f, 0.84485358f, 0.8415549994f, 0.838224709f, 0.8348628879f, 0.8314695954f, 0.8280450702f, 0.8245893121f, 0.8211025f, 0.8175848126f, 0.8140363097f, 0.81045717f, 0.8068475723f, 0.8032075167f, 0.7995372415f, 0.7958369255f, 0.7921065688f, 0.7883464098f, 0.7845565677f, 0.7807372212f, 0.7768884897f, 0.7730104327f, 0.7691033483f, 0.7651672363f, 0.761202395f, 0.7572088242f, 0.7531868219f, 0.7491363883f, 0.7450577617f, 0.7409511209f, 0.7368165851f, 0.7326542735f, 0.728464365f, 0.724247098f, 0.720002532f, 0.7157308459f, 0.7114322186f, 0.7071067691f, 0.7027547359f, 0.6983762383f, 0.6939714551f, 0.689540565f, 0.6850836873f, 0.6806010008f, 0.6760926843f, 0.6715589762f, 0.6669999361f, 0.6624158025f, 0.6578066945f, 0.6531728506f, 0.64851439f, 0.6438315511f, 0.6391244531f, 0.6343932748f, 0.6296382546f, 0.6248595119f, 0.6200572252f, 0.6152315736f, 0.6103827953f, 0.6055110693f, 0.6006164551f, 0.5956993103f, 0.5907596946f, 0.5857978463f, 0.5808139443f, 0.5758081675f, 0.5707807541f, 0.5657318234f, 0.5606615543f, 0.5555702448f, 0.5504579544f, 0.5453249812f, 0.5401714444f, 0.534997642f, 0.5298036337f, 0.5245896578f, 0.5193560123f, 0.514102757f, 0.5088301301f, 0.5035383701f, 0.4982276559f, 0.492898196f, 0.4875501692f, 0.4821837842f, 0.4767992198f, 0.4713967443f, 0.4659765065f, 0.4605387151f, 0.4550835788f, 0.449611336f, 0.4441221356f, 0.438616246f, 0.433093816f, 0.4275550842f, 0.4220002592f, 0.4164295495f, 0.4108431637f, 0.4052413106f, 0.3996241987f, 0.3939920366f, 0.3883450329f, 0.3826834261f, 0.3770074248f, 0.3713172078f, 0.3656129837f, 0.3598950505f, 0.3541635275f, 0.3484186828f, 0.3426607251f, 0.336889863f, 0.3311063051f, 0.3253102899f, 0.3195020258f, 0.3136817515f, 0.3078496456f, 0.3020059466f, 0.296150893f, 0.2902846634f, 0.2844075263f, 0.27851969f, 0.2726213634f, 0.266712755f, 0.2607941031f, 0.2548656464f, 0.2489276081f, 0.2429801822f, 0.2370236069f, 0.2310581058f, 0.2250839174f, 0.2191012353f, 0.2131103128f, 0.2071113735f, 0.201104641f, 0.1950903237f, 0.1890686601f, 0.1830398887f, 0.1770042181f, 0.1709618866f, 0.1649131179f, 0.1588581502f, 0.1527971923f, 0.1467304677f, 0.1406582445f, 0.1345807016f, 0.1284981072f, 0.1224106774f, 0.1163186282f, 0.1102222055f, 0.1041216329f, 0.09801714122f, 0.09190895408f, 0.08579730988f, 0.07968243957f, 0.07356456667f, 0.06744392216f, 0.061320737f, 0.05519524589f, 0.04906767607f, 0.0429382585f, 0.03680722415f, 0.030674804f, 0.02454122901f, 0.01840673015f, 0.01227153838f, 0.006135884672f, 1.0f, 0.9999811649f, 0.9999247193f, 0.9998306036f, 0.9996988177f, 0.9995294213f, 0.9993223548f, 0.9990777373f, 0.9987954497f, 0.9984755516f, 0.9981181026f, 0.997723043f, 0.9972904325f, 0.996820271f, 0.9963126183f, 0.9957674146f, 0.9951847196f, 0.9945645928f, 0.9939069748f, 0.993211925f, 0.9924795628f, 0.9917097688f, 0.9909026623f, 0.9900581837f, 0.9891765118f, 0.988257587f, 0.9873014092f, 0.9863080978f, 0.9852776527f, 0.9842100739f, 0.9831054807f, 0.9819638729f, 0.9807852507f, 0.9795697927f, 0.97831738f, 0.9770281315f, 0.975702107f, 0.974339366f, 0.9729399681f, 0.9715039134f, 0.9700312614f, 0.9685220718f, 0.9669764638f, 0.9653944373f, 0.963776052f, 0.9621214271f, 0.9604305029f, 0.9587034583f, 0.9569403529f, 0.9551411867f, 0.9533060193f, 0.9514350295f, 0.9495281577f, 0.9475855827f, 0.9456073046f, 0.9435934424f, 0.9415440559f, 0.9394592047f, 0.9373390079f, 0.9351835251f, 0.932992816f, 0.9307669401f, 0.9285060763f, 0.9262102246f, 0.9238795042f, 0.9215140343f, 0.9191138744f, 0.9166790843f, 0.9142097831f, 0.9117060304f, 0.909168005f, 0.9065957069f, 0.903989315f, 0.9013488293f, 0.8986744881f, 0.8959662318f, 0.893224299f, 0.8904487491f, 0.8876396418f, 0.8847970963f, 0.8819212914f, 0.8790122271f, 0.8760700822f, 0.8730949759f, 0.8700869679f, 0.867046237f, 0.8639728427f, 0.8608669639f, 0.8577286005f, 0.854557991f, 0.851355195f, 0.8481203318f, 0.84485358f, 0.8415549994f, 0.838224709f, 0.8348628879f, 0.8314695954f, 0.8280450702f, 0.8245893121f, 0.8211025f, 0.8175848126f, 0.8140363097f, 0.81045717f, 0.8068475723f, 0.8032075167f, 0.7995372415f, 0.7958369255f, 0.7921065688f, 0.7883464098f, 0.7845565677f, 0.7807372212f, 0.7768884897f, 0.7730104327f, 0.7691033483f, 0.7651672363f, 0.761202395f, 0.7572088242f, 0.7531868219f, 0.7491363883f, 0.7450577617f, 0.7409511209f, 0.7368165851f, 0.7326542735f, 0.728464365f, 0.724247098f, 0.720002532f, 0.7157308459f, 0.7114322186f, 0.7071067691f, 0.7027547359f, 0.6983762383f, 0.6939714551f, 0.689540565f, 0.6850836873f, 0.6806010008f, 0.6760926843f, 0.6715589762f, 0.6669999361f, 0.6624158025f, 0.6578066945f, 0.6531728506f, 0.64851439f, 0.6438315511f, 0.6391244531f, 0.6343932748f, 0.6296382546f, 0.6248595119f, 0.6200572252f, 0.6152315736f, 0.6103827953f, 0.6055110693f, 0.6006164551f, 0.5956993103f, 0.5907596946f, 0.5857978463f, 0.5808139443f, 0.5758081675f, 0.5707807541f, 0.5657318234f, 0.5606615543f, 0.5555702448f, 0.5504579544f, 0.5453249812f, 0.5401714444f, 0.534997642f, 0.5298036337f, 0.5245896578f, 0.5193560123f, 0.514102757f, 0.5088301301f, 0.5035383701f, 0.4982276559f, 0.492898196f, 0.4875501692f, 0.4821837842f, 0.4767992198f, 0.4713967443f, 0.4659765065f, 0.4605387151f, 0.4550835788f, 0.449611336f, 0.4441221356f, 0.438616246f, 0.433093816f, 0.4275550842f, 0.4220002592f, 0.4164295495f, 0.4108431637f, 0.4052413106f, 0.3996241987f, 0.3939920366f, 0.3883450329f, 0.3826834261f, 0.3770074248f, 0.3713172078f, 0.3656129837f, 0.3598950505f, 0.3541635275f, 0.3484186828f, 0.3426607251f, 0.336889863f, 0.3311063051f, 0.3253102899f, 0.3195020258f, 0.3136817515f, 0.3078496456f, 0.3020059466f, 0.296150893f, 0.2902846634f, 0.2844075263f, 0.27851969f, 0.2726213634f, 0.266712755f, 0.2607941031f, 0.2548656464f, 0.2489276081f, 0.2429801822f, 0.2370236069f, 0.2310581058f, 0.2250839174f, 0.2191012353f, 0.2131103128f, 0.2071113735f, 0.201104641f, 0.1950903237f, 0.1890686601f, 0.1830398887f, 0.1770042181f, 0.1709618866f, 0.1649131179f, 0.1588581502f, 0.1527971923f, 0.1467304677f, 0.1406582445f, 0.1345807016f, 0.1284981072f, 0.1224106774f, 0.1163186282f, 0.1102222055f, 0.1041216329f, 0.09801714122f, 0.09190895408f, 0.08579730988f, 0.07968243957f, 0.07356456667f, 0.06744392216f, 0.061320737f, 0.05519524589f, 0.04906767607f, 0.0429382585f, 0.03680722415f, 0.030674804f, 0.02454122901f, 0.01840673015f, 0.01227153838f, 0.006135884672f}; -constant float tc14[512] = {1.0f, 0.9999811649f, 0.9999247193f, 0.9998306036f, 0.9996988177f, 0.9995294213f, 0.9993223548f, 0.9990777373f, 0.9987954497f, 0.9984755516f, 0.9981181026f, 0.997723043f, 0.9972904325f, 0.996820271f, 0.9963126183f, 0.9957674146f, 0.9951847196f, 0.9945645928f, 0.9939069748f, 0.993211925f, 0.9924795628f, 0.9917097688f, 0.9909026623f, 0.9900581837f, 0.9891765118f, 0.988257587f, 0.9873014092f, 0.9863080978f, 0.9852776527f, 0.9842100739f, 0.9831054807f, 0.9819638729f, 0.9807852507f, 0.9795697927f, 0.97831738f, 0.9770281315f, 0.975702107f, 0.974339366f, 0.9729399681f, 0.9715039134f, 0.9700312614f, 0.9685220718f, 0.9669764638f, 0.9653944373f, 0.963776052f, 0.9621214271f, 0.9604305029f, 0.9587034583f, 0.9569403529f, 0.9551411867f, 0.9533060193f, 0.9514350295f, 0.9495281577f, 0.9475855827f, 0.9456073046f, 0.9435934424f, 0.9415440559f, 0.9394592047f, 0.9373390079f, 0.9351835251f, 0.932992816f, 0.9307669401f, 0.9285060763f, 0.9262102246f, 0.9238795042f, 0.9215140343f, 0.9191138744f, 0.9166790843f, 0.9142097831f, 0.9117060304f, 0.909168005f, 0.9065957069f, 0.903989315f, 0.9013488293f, 0.8986744881f, 0.8959662318f, 0.893224299f, 0.8904487491f, 0.8876396418f, 0.8847970963f, 0.8819212914f, 0.8790122271f, 0.8760700822f, 0.8730949759f, 0.8700869679f, 0.867046237f, 0.8639728427f, 0.8608669639f, 0.8577286005f, 0.854557991f, 0.851355195f, 0.8481203318f, 0.84485358f, 0.8415549994f, 0.838224709f, 0.8348628879f, 0.8314695954f, 0.8280450702f, 0.8245893121f, 0.8211025f, 0.8175848126f, 0.8140363097f, 0.81045717f, 0.8068475723f, 0.8032075167f, 0.7995372415f, 0.7958369255f, 0.7921065688f, 0.7883464098f, 0.7845565677f, 0.7807372212f, 0.7768884897f, 0.7730104327f, 0.7691033483f, 0.7651672363f, 0.761202395f, 0.7572088242f, 0.7531868219f, 0.7491363883f, 0.7450577617f, 0.7409511209f, 0.7368165851f, 0.7326542735f, 0.728464365f, 0.724247098f, 0.720002532f, 0.7157308459f, 0.7114322186f, 0.7071067691f, 0.7027547359f, 0.6983762383f, 0.6939714551f, 0.689540565f, 0.6850836873f, 0.6806010008f, 0.6760926843f, 0.6715589762f, 0.6669999361f, 0.6624158025f, 0.6578066945f, 0.6531728506f, 0.64851439f, 0.6438315511f, 0.6391244531f, 0.6343932748f, 0.6296382546f, 0.6248595119f, 0.6200572252f, 0.6152315736f, 0.6103827953f, 0.6055110693f, 0.6006164551f, 0.5956993103f, 0.5907596946f, 0.5857978463f, 0.5808139443f, 0.5758081675f, 0.5707807541f, 0.5657318234f, 0.5606615543f, 0.5555702448f, 0.5504579544f, 0.5453249812f, 0.5401714444f, 0.534997642f, 0.5298036337f, 0.5245896578f, 0.5193560123f, 0.514102757f, 0.5088301301f, 0.5035383701f, 0.4982276559f, 0.492898196f, 0.4875501692f, 0.4821837842f, 0.4767992198f, 0.4713967443f, 0.4659765065f, 0.4605387151f, 0.4550835788f, 0.449611336f, 0.4441221356f, 0.438616246f, 0.433093816f, 0.4275550842f, 0.4220002592f, 0.4164295495f, 0.4108431637f, 0.4052413106f, 0.3996241987f, 0.3939920366f, 0.3883450329f, 0.3826834261f, 0.3770074248f, 0.3713172078f, 0.3656129837f, 0.3598950505f, 0.3541635275f, 0.3484186828f, 0.3426607251f, 0.336889863f, 0.3311063051f, 0.3253102899f, 0.3195020258f, 0.3136817515f, 0.3078496456f, 0.3020059466f, 0.296150893f, 0.2902846634f, 0.2844075263f, 0.27851969f, 0.2726213634f, 0.266712755f, 0.2607941031f, 0.2548656464f, 0.2489276081f, 0.2429801822f, 0.2370236069f, 0.2310581058f, 0.2250839174f, 0.2191012353f, 0.2131103128f, 0.2071113735f, 0.201104641f, 0.1950903237f, 0.1890686601f, 0.1830398887f, 0.1770042181f, 0.1709618866f, 0.1649131179f, 0.1588581502f, 0.1527971923f, 0.1467304677f, 0.1406582445f, 0.1345807016f, 0.1284981072f, 0.1224106774f, 0.1163186282f, 0.1102222055f, 0.1041216329f, 0.09801714122f, 0.09190895408f, 0.08579730988f, 0.07968243957f, 0.07356456667f, 0.06744392216f, 0.061320737f, 0.05519524589f, 0.04906767607f, 0.0429382585f, 0.03680722415f, 0.030674804f, 0.02454122901f, 0.01840673015f, 0.01227153838f, 0.006135884672f, 1.0f, 0.9999811649f, 0.9999247193f, 0.9998306036f, 0.9996988177f, 0.9995294213f, 0.9993223548f, 0.9990777373f, 0.9987954497f, 0.9984755516f, 0.9981181026f, 0.997723043f, 0.9972904325f, 0.996820271f, 0.9963126183f, 0.9957674146f, 0.9951847196f, 0.9945645928f, 0.9939069748f, 0.993211925f, 0.9924795628f, 0.9917097688f, 0.9909026623f, 0.9900581837f, 0.9891765118f, 0.988257587f, 0.9873014092f, 0.9863080978f, 0.9852776527f, 0.9842100739f, 0.9831054807f, 0.9819638729f, 0.9807852507f, 0.9795697927f, 0.97831738f, 0.9770281315f, 0.975702107f, 0.974339366f, 0.9729399681f, 0.9715039134f, 0.9700312614f, 0.9685220718f, 0.9669764638f, 0.9653944373f, 0.963776052f, 0.9621214271f, 0.9604305029f, 0.9587034583f, 0.9569403529f, 0.9551411867f, 0.9533060193f, 0.9514350295f, 0.9495281577f, 0.9475855827f, 0.9456073046f, 0.9435934424f, 0.9415440559f, 0.9394592047f, 0.9373390079f, 0.9351835251f, 0.932992816f, 0.9307669401f, 0.9285060763f, 0.9262102246f, 0.9238795042f, 0.9215140343f, 0.9191138744f, 0.9166790843f, 0.9142097831f, 0.9117060304f, 0.909168005f, 0.9065957069f, 0.903989315f, 0.9013488293f, 0.8986744881f, 0.8959662318f, 0.893224299f, 0.8904487491f, 0.8876396418f, 0.8847970963f, 0.8819212914f, 0.8790122271f, 0.8760700822f, 0.8730949759f, 0.8700869679f, 0.867046237f, 0.8639728427f, 0.8608669639f, 0.8577286005f, 0.854557991f, 0.851355195f, 0.8481203318f, 0.84485358f, 0.8415549994f, 0.838224709f, 0.8348628879f, 0.8314695954f, 0.8280450702f, 0.8245893121f, 0.8211025f, 0.8175848126f, 0.8140363097f, 0.81045717f, 0.8068475723f, 0.8032075167f, 0.7995372415f, 0.7958369255f, 0.7921065688f, 0.7883464098f, 0.7845565677f, 0.7807372212f, 0.7768884897f, 0.7730104327f, 0.7691033483f, 0.7651672363f, 0.761202395f, 0.7572088242f, 0.7531868219f, 0.7491363883f, 0.7450577617f, 0.7409511209f, 0.7368165851f, 0.7326542735f, 0.728464365f, 0.724247098f, 0.720002532f, 0.7157308459f, 0.7114322186f, 0.7071067691f, 0.7027547359f, 0.6983762383f, 0.6939714551f, 0.689540565f, 0.6850836873f, 0.6806010008f, 0.6760926843f, 0.6715589762f, 0.6669999361f, 0.6624158025f, 0.6578066945f, 0.6531728506f, 0.64851439f, 0.6438315511f, 0.6391244531f, 0.6343932748f, 0.6296382546f, 0.6248595119f, 0.6200572252f, 0.6152315736f, 0.6103827953f, 0.6055110693f, 0.6006164551f, 0.5956993103f, 0.5907596946f, 0.5857978463f, 0.5808139443f, 0.5758081675f, 0.5707807541f, 0.5657318234f, 0.5606615543f, 0.5555702448f, 0.5504579544f, 0.5453249812f, 0.5401714444f, 0.534997642f, 0.5298036337f, 0.5245896578f, 0.5193560123f, 0.514102757f, 0.5088301301f, 0.5035383701f, 0.4982276559f, 0.492898196f, 0.4875501692f, 0.4821837842f, 0.4767992198f, 0.4713967443f, 0.4659765065f, 0.4605387151f, 0.4550835788f, 0.449611336f, 0.4441221356f, 0.438616246f, 0.433093816f, 0.4275550842f, 0.4220002592f, 0.4164295495f, 0.4108431637f, 0.4052413106f, 0.3996241987f, 0.3939920366f, 0.3883450329f, 0.3826834261f, 0.3770074248f, 0.3713172078f, 0.3656129837f, 0.3598950505f, 0.3541635275f, 0.3484186828f, 0.3426607251f, 0.336889863f, 0.3311063051f, 0.3253102899f, 0.3195020258f, 0.3136817515f, 0.3078496456f, 0.3020059466f, 0.296150893f, 0.2902846634f, 0.2844075263f, 0.27851969f, 0.2726213634f, 0.266712755f, 0.2607941031f, 0.2548656464f, 0.2489276081f, 0.2429801822f, 0.2370236069f, 0.2310581058f, 0.2250839174f, 0.2191012353f, 0.2131103128f, 0.2071113735f, 0.201104641f, 0.1950903237f, 0.1890686601f, 0.1830398887f, 0.1770042181f, 0.1709618866f, 0.1649131179f, 0.1588581502f, 0.1527971923f, 0.1467304677f, 0.1406582445f, 0.1345807016f, 0.1284981072f, 0.1224106774f, 0.1163186282f, 0.1102222055f, 0.1041216329f, 0.09801714122f, 0.09190895408f, 0.08579730988f, 0.07968243957f, 0.07356456667f, 0.06744392216f, 0.061320737f, 0.05519524589f, 0.04906767607f, 0.0429382585f, 0.03680722415f, 0.030674804f, 0.02454122901f, 0.01840673015f, 0.01227153838f, 0.006135884672f}; -constant float tc12[512] = {1.0f, 0.9998306036f, 0.9993223548f, 0.9984755516f, 0.9972904325f, 0.9957674146f, 0.9939069748f, 0.9917097688f, 0.9891765118f, 0.9863080978f, 0.9831054807f, 0.9795697927f, 0.975702107f, 0.9715039134f, 0.9669764638f, 0.9621214271f, 0.9569403529f, 0.9514350295f, 0.9456073046f, 0.9394592047f, 0.932992816f, 0.9262102246f, 0.9191138744f, 0.9117060304f, 0.903989315f, 0.8959662318f, 0.8876396418f, 0.8790122271f, 0.8700869679f, 0.8608669639f, 0.851355195f, 0.8415549994f, 0.8314695954f, 0.8211025f, 0.81045717f, 0.7995372415f, 0.7883464098f, 0.7768884897f, 0.7651672363f, 0.7531868219f, 0.7409511209f, 0.728464365f, 0.7157308459f, 0.7027547359f, 0.689540565f, 0.6760926843f, 0.6624158025f, 0.64851439f, 0.6343932748f, 0.6200572252f, 0.6055110693f, 0.5907596946f, 0.5758081675f, 0.5606615543f, 0.5453249812f, 0.5298036337f, 0.514102757f, 0.4982276559f, 0.4821837842f, 0.4659765065f, 0.449611336f, 0.433093816f, 0.4164295495f, 0.3996241987f, 0.3826834261f, 0.3656129837f, 0.3484186828f, 0.3311063051f, 0.3136817515f, 0.296150893f, 0.27851969f, 0.2607941031f, 0.2429801822f, 0.2250839174f, 0.2071113735f, 0.1890686601f, 0.1709618866f, 0.1527971923f, 0.1345807016f, 0.1163186282f, 0.09801714122f, 0.07968243957f, 0.061320737f, 0.0429382585f, 0.02454122901f, 0.006135884672f, -0.01227153838f, -0.030674804f, -0.04906767607f, -0.06744392216f, -0.08579730988f, -0.1041216329f, -0.1224106774f, -0.1406582445f, -0.1588581502f, -0.1770042181f, -0.1950903237f, -0.2131103128f, -0.2310581058f, -0.2489276081f, -0.266712755f, -0.2844075263f, -0.3020059466f, -0.3195020258f, -0.336889863f, -0.3541635275f, -0.3713172078f, -0.3883450329f, -0.4052413106f, -0.4220002592f, -0.438616246f, -0.4550835788f, -0.4713967443f, -0.4875501692f, -0.5035383701f, -0.5193560123f, -0.534997642f, -0.5504579544f, -0.5657318234f, -0.5808139443f, -0.5956993103f, -0.6103827953f, -0.6248595119f, -0.6391244531f, -0.6531728506f, -0.6669999361f, -0.6806010008f, -0.6939714551f, -0.7071067691f, -0.720002532f, -0.7326542735f, -0.7450577617f, -0.7572088242f, -0.7691033483f, -0.7807372212f, -0.7921065688f, -0.8032075167f, -0.8140363097f, -0.8245893121f, -0.8348628879f, -0.84485358f, -0.854557991f, -0.8639728427f, -0.8730949759f, -0.8819212914f, -0.8904487491f, -0.8986744881f, -0.9065957069f, -0.9142097831f, -0.9215140343f, -0.9285060763f, -0.9351835251f, -0.9415440559f, -0.9475855827f, -0.9533060193f, -0.9587034583f, -0.963776052f, -0.9685220718f, -0.9729399681f, -0.9770281315f, -0.9807852507f, -0.9842100739f, -0.9873014092f, -0.9900581837f, -0.9924795628f, -0.9945645928f, -0.9963126183f, -0.997723043f, -0.9987954497f, -0.9995294213f, -0.9999247193f, -0.9999811649f, -0.9996988177f, -0.9990777373f, -0.9981181026f, -0.996820271f, -0.9951847196f, -0.993211925f, -0.9909026623f, -0.988257587f, -0.9852776527f, -0.9819638729f, -0.97831738f, -0.974339366f, -0.9700312614f, -0.9653944373f, -0.9604305029f, -0.9551411867f, -0.9495281577f, -0.9435934424f, -0.9373390079f, -0.9307669401f, -0.9238795042f, -0.9166790843f, -0.909168005f, -0.9013488293f, -0.893224299f, -0.8847970963f, -0.8760700822f, -0.867046237f, -0.8577286005f, -0.8481203318f, -0.838224709f, -0.8280450702f, -0.8175848126f, -0.8068475723f, -0.7958369255f, -0.7845565677f, -0.7730104327f, -0.761202395f, -0.7491363883f, -0.7368165851f, -0.724247098f, -0.7114322186f, -0.6983762383f, -0.6850836873f, -0.6715589762f, -0.6578066945f, -0.6438315511f, -0.6296382546f, -0.6152315736f, -0.6006164551f, -0.5857978463f, -0.5707807541f, -0.5555702448f, -0.5401714444f, -0.5245896578f, -0.5088301301f, -0.492898196f, -0.4767992198f, -0.4605387151f, -0.4441221356f, -0.4275550842f, -0.4108431637f, -0.3939920366f, -0.3770074248f, -0.3598950505f, -0.3426607251f, -0.3253102899f, -0.3078496456f, -0.2902846634f, -0.2726213634f, -0.2548656464f, -0.2370236069f, -0.2191012353f, -0.201104641f, -0.1830398887f, -0.1649131179f, -0.1467304677f, -0.1284981072f, -0.1102222055f, -0.09190895408f, -0.07356456667f, -0.05519524589f, -0.03680722415f, -0.01840673015f, 1.0f, 0.9998306036f, 0.9993223548f, 0.9984755516f, 0.9972904325f, 0.9957674146f, 0.9939069748f, 0.9917097688f, 0.9891765118f, 0.9863080978f, 0.9831054807f, 0.9795697927f, 0.975702107f, 0.9715039134f, 0.9669764638f, 0.9621214271f, 0.9569403529f, 0.9514350295f, 0.9456073046f, 0.9394592047f, 0.932992816f, 0.9262102246f, 0.9191138744f, 0.9117060304f, 0.903989315f, 0.8959662318f, 0.8876396418f, 0.8790122271f, 0.8700869679f, 0.8608669639f, 0.851355195f, 0.8415549994f, 0.8314695954f, 0.8211025f, 0.81045717f, 0.7995372415f, 0.7883464098f, 0.7768884897f, 0.7651672363f, 0.7531868219f, 0.7409511209f, 0.728464365f, 0.7157308459f, 0.7027547359f, 0.689540565f, 0.6760926843f, 0.6624158025f, 0.64851439f, 0.6343932748f, 0.6200572252f, 0.6055110693f, 0.5907596946f, 0.5758081675f, 0.5606615543f, 0.5453249812f, 0.5298036337f, 0.514102757f, 0.4982276559f, 0.4821837842f, 0.4659765065f, 0.449611336f, 0.433093816f, 0.4164295495f, 0.3996241987f, 0.3826834261f, 0.3656129837f, 0.3484186828f, 0.3311063051f, 0.3136817515f, 0.296150893f, 0.27851969f, 0.2607941031f, 0.2429801822f, 0.2250839174f, 0.2071113735f, 0.1890686601f, 0.1709618866f, 0.1527971923f, 0.1345807016f, 0.1163186282f, 0.09801714122f, 0.07968243957f, 0.061320737f, 0.0429382585f, 0.02454122901f, 0.006135884672f, -0.01227153838f, -0.030674804f, -0.04906767607f, -0.06744392216f, -0.08579730988f, -0.1041216329f, -0.1224106774f, -0.1406582445f, -0.1588581502f, -0.1770042181f, -0.1950903237f, -0.2131103128f, -0.2310581058f, -0.2489276081f, -0.266712755f, -0.2844075263f, -0.3020059466f, -0.3195020258f, -0.336889863f, -0.3541635275f, -0.3713172078f, -0.3883450329f, -0.4052413106f, -0.4220002592f, -0.438616246f, -0.4550835788f, -0.4713967443f, -0.4875501692f, -0.5035383701f, -0.5193560123f, -0.534997642f, -0.5504579544f, -0.5657318234f, -0.5808139443f, -0.5956993103f, -0.6103827953f, -0.6248595119f, -0.6391244531f, -0.6531728506f, -0.6669999361f, -0.6806010008f, -0.6939714551f, -0.7071067691f, -0.720002532f, -0.7326542735f, -0.7450577617f, -0.7572088242f, -0.7691033483f, -0.7807372212f, -0.7921065688f, -0.8032075167f, -0.8140363097f, -0.8245893121f, -0.8348628879f, -0.84485358f, -0.854557991f, -0.8639728427f, -0.8730949759f, -0.8819212914f, -0.8904487491f, -0.8986744881f, -0.9065957069f, -0.9142097831f, -0.9215140343f, -0.9285060763f, -0.9351835251f, -0.9415440559f, -0.9475855827f, -0.9533060193f, -0.9587034583f, -0.963776052f, -0.9685220718f, -0.9729399681f, -0.9770281315f, -0.9807852507f, -0.9842100739f, -0.9873014092f, -0.9900581837f, -0.9924795628f, -0.9945645928f, -0.9963126183f, -0.997723043f, -0.9987954497f, -0.9995294213f, -0.9999247193f, -0.9999811649f, -0.9996988177f, -0.9990777373f, -0.9981181026f, -0.996820271f, -0.9951847196f, -0.993211925f, -0.9909026623f, -0.988257587f, -0.9852776527f, -0.9819638729f, -0.97831738f, -0.974339366f, -0.9700312614f, -0.9653944373f, -0.9604305029f, -0.9551411867f, -0.9495281577f, -0.9435934424f, -0.9373390079f, -0.9307669401f, -0.9238795042f, -0.9166790843f, -0.909168005f, -0.9013488293f, -0.893224299f, -0.8847970963f, -0.8760700822f, -0.867046237f, -0.8577286005f, -0.8481203318f, -0.838224709f, -0.8280450702f, -0.8175848126f, -0.8068475723f, -0.7958369255f, -0.7845565677f, -0.7730104327f, -0.761202395f, -0.7491363883f, -0.7368165851f, -0.724247098f, -0.7114322186f, -0.6983762383f, -0.6850836873f, -0.6715589762f, -0.6578066945f, -0.6438315511f, -0.6296382546f, -0.6152315736f, -0.6006164551f, -0.5857978463f, -0.5707807541f, -0.5555702448f, -0.5401714444f, -0.5245896578f, -0.5088301301f, -0.492898196f, -0.4767992198f, -0.4605387151f, -0.4441221356f, -0.4275550842f, -0.4108431637f, -0.3939920366f, -0.3770074248f, -0.3598950505f, -0.3426607251f, -0.3253102899f, -0.3078496456f, -0.2902846634f, -0.2726213634f, -0.2548656464f, -0.2370236069f, -0.2191012353f, -0.201104641f, -0.1830398887f, -0.1649131179f, -0.1467304677f, -0.1284981072f, -0.1102222055f, -0.09190895408f, -0.07356456667f, -0.05519524589f, -0.03680722415f, -0.01840673015f}; -constant float tc15[512] = { 1.0f, 0.9998306036f, 0.9993223548f, 0.9984755516f, 0.9972904325f, 0.9957674146f, 0.9939069748f, 0.9917097688f, 0.9891765118f, 0.9863080978f, 0.9831054807f, 0.9795697927f, 0.975702107f, 0.9715039134f, 0.9669764638f, 0.9621214271f, 0.9569403529f, 0.9514350295f, 0.9456073046f, 0.9394592047f, 0.932992816f, 0.9262102246f, 0.9191138744f, 0.9117060304f, 0.903989315f, 0.8959662318f, 0.8876396418f, 0.8790122271f, 0.8700869679f, 0.8608669639f, 0.851355195f, 0.8415549994f, 0.8314695954f, 0.8211025f, 0.81045717f, 0.7995372415f, 0.7883464098f, 0.7768884897f, 0.7651672363f, 0.7531868219f, 0.7409511209f, 0.728464365f, 0.7157308459f, 0.7027547359f, 0.689540565f, 0.6760926843f, 0.6624158025f, 0.64851439f, 0.6343932748f, 0.6200572252f, 0.6055110693f, 0.5907596946f, 0.5758081675f, 0.5606615543f, 0.5453249812f, 0.5298036337f, 0.514102757f, 0.4982276559f, 0.4821837842f, 0.4659765065f, 0.449611336f, 0.433093816f, 0.4164295495f, 0.3996241987f, 0.3826834261f, 0.3656129837f, 0.3484186828f, 0.3311063051f, 0.3136817515f, 0.296150893f, 0.27851969f, 0.2607941031f, 0.2429801822f, 0.2250839174f, 0.2071113735f, 0.1890686601f, 0.1709618866f, 0.1527971923f, 0.1345807016f, 0.1163186282f, 0.09801714122f, 0.07968243957f, 0.061320737f, 0.0429382585f, 0.02454122901f, 0.006135884672f, -0.01227153838f, -0.030674804f, -0.04906767607f, -0.06744392216f, -0.08579730988f, -0.1041216329f, -0.1224106774f, -0.1406582445f, -0.1588581502f, -0.1770042181f, -0.1950903237f, -0.2131103128f, -0.2310581058f, -0.2489276081f, -0.266712755f, -0.2844075263f, -0.3020059466f, -0.3195020258f, -0.336889863f, -0.3541635275f, -0.3713172078f, -0.3883450329f, -0.4052413106f, -0.4220002592f, -0.438616246f, -0.4550835788f, -0.4713967443f, -0.4875501692f, -0.5035383701f, -0.5193560123f, -0.534997642f, -0.5504579544f, -0.5657318234f, -0.5808139443f, -0.5956993103f, -0.6103827953f, -0.6248595119f, -0.6391244531f, -0.6531728506f, -0.6669999361f, -0.6806010008f, -0.6939714551f, -0.7071067691f, -0.720002532f, -0.7326542735f, -0.7450577617f, -0.7572088242f, -0.7691033483f, -0.7807372212f, -0.7921065688f, -0.8032075167f, -0.8140363097f, -0.8245893121f, -0.8348628879f, -0.84485358f, -0.854557991f, -0.8639728427f, -0.8730949759f, -0.8819212914f, -0.8904487491f, -0.8986744881f, -0.9065957069f, -0.9142097831f, -0.9215140343f, -0.9285060763f, -0.9351835251f, -0.9415440559f, -0.9475855827f, -0.9533060193f, -0.9587034583f, -0.963776052f, -0.9685220718f, -0.9729399681f, -0.9770281315f, -0.9807852507f, -0.9842100739f, -0.9873014092f, -0.9900581837f, -0.9924795628f, -0.9945645928f, -0.9963126183f, -0.997723043f, -0.9987954497f, -0.9995294213f, -0.9999247193f, -0.9999811649f, -0.9996988177f, -0.9990777373f, -0.9981181026f, -0.996820271f, -0.9951847196f, -0.993211925f, -0.9909026623f, -0.988257587f, -0.9852776527f, -0.9819638729f, -0.97831738f, -0.974339366f, -0.9700312614f, -0.9653944373f, -0.9604305029f, -0.9551411867f, -0.9495281577f, -0.9435934424f, -0.9373390079f, -0.9307669401f, -0.9238795042f, -0.9166790843f, -0.909168005f, -0.9013488293f, -0.893224299f, -0.8847970963f, -0.8760700822f, -0.867046237f, -0.8577286005f, -0.8481203318f, -0.838224709f, -0.8280450702f, -0.8175848126f, -0.8068475723f, -0.7958369255f, -0.7845565677f, -0.7730104327f, -0.761202395f, -0.7491363883f, -0.7368165851f, -0.724247098f, -0.7114322186f, -0.6983762383f, -0.6850836873f, -0.6715589762f, -0.6578066945f, -0.6438315511f, -0.6296382546f, -0.6152315736f, -0.6006164551f, -0.5857978463f, -0.5707807541f, -0.5555702448f, -0.5401714444f, -0.5245896578f, -0.5088301301f, -0.492898196f, -0.4767992198f, -0.4605387151f, -0.4441221356f, -0.4275550842f, -0.4108431637f, -0.3939920366f, -0.3770074248f, -0.3598950505f, -0.3426607251f, -0.3253102899f, -0.3078496456f, -0.2902846634f, -0.2726213634f, -0.2548656464f, -0.2370236069f, -0.2191012353f, -0.201104641f, -0.1830398887f, -0.1649131179f, -0.1467304677f, -0.1284981072f, -0.1102222055f, -0.09190895408f, -0.07356456667f, -0.05519524589f, -0.03680722415f, -0.01840673015f, 1.0f, 0.9998306036f, 0.9993223548f, 0.9984755516f, 0.9972904325f, 0.9957674146f, 0.9939069748f, 0.9917097688f, 0.9891765118f, 0.9863080978f, 0.9831054807f, 0.9795697927f, 0.975702107f, 0.9715039134f, 0.9669764638f, 0.9621214271f, 0.9569403529f, 0.9514350295f, 0.9456073046f, 0.9394592047f, 0.932992816f, 0.9262102246f, 0.9191138744f, 0.9117060304f, 0.903989315f, 0.8959662318f, 0.8876396418f, 0.8790122271f, 0.8700869679f, 0.8608669639f, 0.851355195f, 0.8415549994f, 0.8314695954f, 0.8211025f, 0.81045717f, 0.7995372415f, 0.7883464098f, 0.7768884897f, 0.7651672363f, 0.7531868219f, 0.7409511209f, 0.728464365f, 0.7157308459f, 0.7027547359f, 0.689540565f, 0.6760926843f, 0.6624158025f, 0.64851439f, 0.6343932748f, 0.6200572252f, 0.6055110693f, 0.5907596946f, 0.5758081675f, 0.5606615543f, 0.5453249812f, 0.5298036337f, 0.514102757f, 0.4982276559f, 0.4821837842f, 0.4659765065f, 0.449611336f, 0.433093816f, 0.4164295495f, 0.3996241987f, 0.3826834261f, 0.3656129837f, 0.3484186828f, 0.3311063051f, 0.3136817515f, 0.296150893f, 0.27851969f, 0.2607941031f, 0.2429801822f, 0.2250839174f, 0.2071113735f, 0.1890686601f, 0.1709618866f, 0.1527971923f, 0.1345807016f, 0.1163186282f, 0.09801714122f, 0.07968243957f, 0.061320737f, 0.0429382585f, 0.02454122901f, 0.006135884672f, -0.01227153838f, -0.030674804f, -0.04906767607f, -0.06744392216f, -0.08579730988f, -0.1041216329f, -0.1224106774f, -0.1406582445f, -0.1588581502f, -0.1770042181f, -0.1950903237f, -0.2131103128f, -0.2310581058f, -0.2489276081f, -0.266712755f, -0.2844075263f, -0.3020059466f, -0.3195020258f, -0.336889863f, -0.3541635275f, -0.3713172078f, -0.3883450329f, -0.4052413106f, -0.4220002592f, -0.438616246f, -0.4550835788f, -0.4713967443f, -0.4875501692f, -0.5035383701f, -0.5193560123f, -0.534997642f, -0.5504579544f, -0.5657318234f, -0.5808139443f, -0.5956993103f, -0.6103827953f, -0.6248595119f, -0.6391244531f, -0.6531728506f, -0.6669999361f, -0.6806010008f, -0.6939714551f, -0.7071067691f, -0.720002532f, -0.7326542735f, -0.7450577617f, -0.7572088242f, -0.7691033483f, -0.7807372212f, -0.7921065688f, -0.8032075167f, -0.8140363097f, -0.8245893121f, -0.8348628879f, -0.84485358f, -0.854557991f, -0.8639728427f, -0.8730949759f, -0.8819212914f, -0.8904487491f, -0.8986744881f, -0.9065957069f, -0.9142097831f, -0.9215140343f, -0.9285060763f, -0.9351835251f, -0.9415440559f, -0.9475855827f, -0.9533060193f, -0.9587034583f, -0.963776052f, -0.9685220718f, -0.9729399681f, -0.9770281315f, -0.9807852507f, -0.9842100739f, -0.9873014092f, -0.9900581837f, -0.9924795628f, -0.9945645928f, -0.9963126183f, -0.997723043f, -0.9987954497f, -0.9995294213f, -0.9999247193f, -0.9999811649f, -0.9996988177f, -0.9990777373f, -0.9981181026f, -0.996820271f, -0.9951847196f, -0.993211925f, -0.9909026623f, -0.988257587f, -0.9852776527f, -0.9819638729f, -0.97831738f, -0.974339366f, -0.9700312614f, -0.9653944373f, -0.9604305029f, -0.9551411867f, -0.9495281577f, -0.9435934424f, -0.9373390079f, -0.9307669401f, -0.9238795042f, -0.9166790843f, -0.909168005f, -0.9013488293f, -0.893224299f, -0.8847970963f, -0.8760700822f, -0.867046237f, -0.8577286005f, -0.8481203318f, -0.838224709f, -0.8280450702f, -0.8175848126f, -0.8068475723f, -0.7958369255f, -0.7845565677f, -0.7730104327f, -0.761202395f, -0.7491363883f, -0.7368165851f, -0.724247098f, -0.7114322186f, -0.6983762383f, -0.6850836873f, -0.6715589762f, -0.6578066945f, -0.6438315511f, -0.6296382546f, -0.6152315736f, -0.6006164551f, -0.5857978463f, -0.5707807541f, -0.5555702448f, -0.5401714444f, -0.5245896578f, -0.5088301301f, -0.492898196f, -0.4767992198f, -0.4605387151f, -0.4441221356f, -0.4275550842f, -0.4108431637f, -0.3939920366f, -0.3770074248f, -0.3598950505f, -0.3426607251f, -0.3253102899f, -0.3078496456f, -0.2902846634f, -0.2726213634f, -0.2548656464f, -0.2370236069f, -0.2191012353f, -0.201104641f, -0.1830398887f, -0.1649131179f, -0.1467304677f, -0.1284981072f, -0.1102222055f, -0.09190895408f, -0.07356456667f, -0.05519524589f, -0.03680722415f, -0.01840673015f}; -constant float tc20[512] = {1.0f, 0.9987954497f, 0.9951847196f, 0.9891765118f, 0.9807852507f, 0.9700312614f, 0.9569403529f, 0.9415440559f, 0.9238795042f, 0.903989315f, 0.8819212914f, 0.8577286005f, 0.8314695954f, 0.8032075167f, 0.7730104327f, 0.7409511209f, 0.7071067691f, 0.6715589762f, 0.6343932748f, 0.5956993103f, 0.5555702448f, 0.514102757f, 0.4713967443f, 0.4275550842f, 0.3826834261f, 0.336889863f, 0.2902846634f, 0.2429801822f, 0.1950903237f, 0.1467304677f, 0.09801714122f, 0.04906767607f, 6.123234263e-17f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, 1.0f, 0.9987954497f, 0.9951847196f, 0.9891765118f, 0.9807852507f, 0.9700312614f, 0.9569403529f, 0.9415440559f, 0.9238795042f, 0.903989315f, 0.8819212914f, 0.8577286005f, 0.8314695954f, 0.8032075167f, 0.7730104327f, 0.7409511209f, 0.7071067691f, 0.6715589762f, 0.6343932748f, 0.5956993103f, 0.5555702448f, 0.514102757f, 0.4713967443f, 0.4275550842f, 0.3826834261f, 0.336889863f, 0.2902846634f, 0.2429801822f, 0.1950903237f, 0.1467304677f, 0.09801714122f, 0.04906767607f, 6.123234263e-17f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, 1.0f, 0.9987954497f, 0.9951847196f, 0.9891765118f, 0.9807852507f, 0.9700312614f, 0.9569403529f, 0.9415440559f, 0.9238795042f, 0.903989315f, 0.8819212914f, 0.8577286005f, 0.8314695954f, 0.8032075167f, 0.7730104327f, 0.7409511209f, 0.7071067691f, 0.6715589762f, 0.6343932748f, 0.5956993103f, 0.5555702448f, 0.514102757f, 0.4713967443f, 0.4275550842f, 0.3826834261f, 0.336889863f, 0.2902846634f, 0.2429801822f, 0.1950903237f, 0.1467304677f, 0.09801714122f, 0.04906767607f, 6.123234263e-17f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, 1.0f, 0.9987954497f, 0.9951847196f, 0.9891765118f, 0.9807852507f, 0.9700312614f, 0.9569403529f, 0.9415440559f, 0.9238795042f, 0.903989315f, 0.8819212914f, 0.8577286005f, 0.8314695954f, 0.8032075167f, 0.7730104327f, 0.7409511209f, 0.7071067691f, 0.6715589762f, 0.6343932748f, 0.5956993103f, 0.5555702448f, 0.514102757f, 0.4713967443f, 0.4275550842f, 0.3826834261f, 0.336889863f, 0.2902846634f, 0.2429801822f, 0.1950903237f, 0.1467304677f, 0.09801714122f, 0.04906767607f, 6.123234263e-17f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, 1.0f, 0.9987954497f, 0.9951847196f, 0.9891765118f, 0.9807852507f, 0.9700312614f, 0.9569403529f, 0.9415440559f, 0.9238795042f, 0.903989315f, 0.8819212914f, 0.8577286005f, 0.8314695954f, 0.8032075167f, 0.7730104327f, 0.7409511209f, 0.7071067691f, 0.6715589762f, 0.6343932748f, 0.5956993103f, 0.5555702448f, 0.514102757f, 0.4713967443f, 0.4275550842f, 0.3826834261f, 0.336889863f, 0.2902846634f, 0.2429801822f, 0.1950903237f, 0.1467304677f, 0.09801714122f, 0.04906767607f, 6.123234263e-17f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, 1.0f, 0.9987954497f, 0.9951847196f, 0.9891765118f, 0.9807852507f, 0.9700312614f, 0.9569403529f, 0.9415440559f, 0.9238795042f, 0.903989315f, 0.8819212914f, 0.8577286005f, 0.8314695954f, 0.8032075167f, 0.7730104327f, 0.7409511209f, 0.7071067691f, 0.6715589762f, 0.6343932748f, 0.5956993103f, 0.5555702448f, 0.514102757f, 0.4713967443f, 0.4275550842f, 0.3826834261f, 0.336889863f, 0.2902846634f, 0.2429801822f, 0.1950903237f, 0.1467304677f, 0.09801714122f, 0.04906767607f, 6.123234263e-17f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, 1.0f, 0.9987954497f, 0.9951847196f, 0.9891765118f, 0.9807852507f, 0.9700312614f, 0.9569403529f, 0.9415440559f, 0.9238795042f, 0.903989315f, 0.8819212914f, 0.8577286005f, 0.8314695954f, 0.8032075167f, 0.7730104327f, 0.7409511209f, 0.7071067691f, 0.6715589762f, 0.6343932748f, 0.5956993103f, 0.5555702448f, 0.514102757f, 0.4713967443f, 0.4275550842f, 0.3826834261f, 0.336889863f, 0.2902846634f, 0.2429801822f, 0.1950903237f, 0.1467304677f, 0.09801714122f, 0.04906767607f, 6.123234263e-17f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, 1.0f, 0.9987954497f, 0.9951847196f, 0.9891765118f, 0.9807852507f, 0.9700312614f, 0.9569403529f, 0.9415440559f, 0.9238795042f, 0.903989315f, 0.8819212914f, 0.8577286005f, 0.8314695954f, 0.8032075167f, 0.7730104327f, 0.7409511209f, 0.7071067691f, 0.6715589762f, 0.6343932748f, 0.5956993103f, 0.5555702448f, 0.514102757f, 0.4713967443f, 0.4275550842f, 0.3826834261f, 0.336889863f, 0.2902846634f, 0.2429801822f, 0.1950903237f, 0.1467304677f, 0.09801714122f, 0.04906767607f, 6.123234263e-17f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f}; -constant float tc23[512] = {1.0f, 0.9987954497f, 0.9951847196f, 0.9891765118f, 0.9807852507f, 0.9700312614f, 0.9569403529f, 0.9415440559f, 0.9238795042f, 0.903989315f, 0.8819212914f, 0.8577286005f, 0.8314695954f, 0.8032075167f, 0.7730104327f, 0.7409511209f, 0.7071067691f, 0.6715589762f, 0.6343932748f, 0.5956993103f, 0.5555702448f, 0.514102757f, 0.4713967443f, 0.4275550842f, 0.3826834261f, 0.336889863f, 0.2902846634f, 0.2429801822f, 0.1950903237f, 0.1467304677f, 0.09801714122f, 0.04906767607f, 6.123234263e-17f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, 1.0f, 0.9987954497f, 0.9951847196f, 0.9891765118f, 0.9807852507f, 0.9700312614f, 0.9569403529f, 0.9415440559f, 0.9238795042f, 0.903989315f, 0.8819212914f, 0.8577286005f, 0.8314695954f, 0.8032075167f, 0.7730104327f, 0.7409511209f, 0.7071067691f, 0.6715589762f, 0.6343932748f, 0.5956993103f, 0.5555702448f, 0.514102757f, 0.4713967443f, 0.4275550842f, 0.3826834261f, 0.336889863f, 0.2902846634f, 0.2429801822f, 0.1950903237f, 0.1467304677f, 0.09801714122f, 0.04906767607f, 6.123234263e-17f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, 1.0f, 0.9987954497f, 0.9951847196f, 0.9891765118f, 0.9807852507f, 0.9700312614f, 0.9569403529f, 0.9415440559f, 0.9238795042f, 0.903989315f, 0.8819212914f, 0.8577286005f, 0.8314695954f, 0.8032075167f, 0.7730104327f, 0.7409511209f, 0.7071067691f, 0.6715589762f, 0.6343932748f, 0.5956993103f, 0.5555702448f, 0.514102757f, 0.4713967443f, 0.4275550842f, 0.3826834261f, 0.336889863f, 0.2902846634f, 0.2429801822f, 0.1950903237f, 0.1467304677f, 0.09801714122f, 0.04906767607f, 6.123234263e-17f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, 1.0f, 0.9987954497f, 0.9951847196f, 0.9891765118f, 0.9807852507f, 0.9700312614f, 0.9569403529f, 0.9415440559f, 0.9238795042f, 0.903989315f, 0.8819212914f, 0.8577286005f, 0.8314695954f, 0.8032075167f, 0.7730104327f, 0.7409511209f, 0.7071067691f, 0.6715589762f, 0.6343932748f, 0.5956993103f, 0.5555702448f, 0.514102757f, 0.4713967443f, 0.4275550842f, 0.3826834261f, 0.336889863f, 0.2902846634f, 0.2429801822f, 0.1950903237f, 0.1467304677f, 0.09801714122f, 0.04906767607f, 6.123234263e-17f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, 1.0f, 0.9987954497f, 0.9951847196f, 0.9891765118f, 0.9807852507f, 0.9700312614f, 0.9569403529f, 0.9415440559f, 0.9238795042f, 0.903989315f, 0.8819212914f, 0.8577286005f, 0.8314695954f, 0.8032075167f, 0.7730104327f, 0.7409511209f, 0.7071067691f, 0.6715589762f, 0.6343932748f, 0.5956993103f, 0.5555702448f, 0.514102757f, 0.4713967443f, 0.4275550842f, 0.3826834261f, 0.336889863f, 0.2902846634f, 0.2429801822f, 0.1950903237f, 0.1467304677f, 0.09801714122f, 0.04906767607f, 6.123234263e-17f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, 1.0f, 0.9987954497f, 0.9951847196f, 0.9891765118f, 0.9807852507f, 0.9700312614f, 0.9569403529f, 0.9415440559f, 0.9238795042f, 0.903989315f, 0.8819212914f, 0.8577286005f, 0.8314695954f, 0.8032075167f, 0.7730104327f, 0.7409511209f, 0.7071067691f, 0.6715589762f, 0.6343932748f, 0.5956993103f, 0.5555702448f, 0.514102757f, 0.4713967443f, 0.4275550842f, 0.3826834261f, 0.336889863f, 0.2902846634f, 0.2429801822f, 0.1950903237f, 0.1467304677f, 0.09801714122f, 0.04906767607f, 6.123234263e-17f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, 1.0f, 0.9987954497f, 0.9951847196f, 0.9891765118f, 0.9807852507f, 0.9700312614f, 0.9569403529f, 0.9415440559f, 0.9238795042f, 0.903989315f, 0.8819212914f, 0.8577286005f, 0.8314695954f, 0.8032075167f, 0.7730104327f, 0.7409511209f, 0.7071067691f, 0.6715589762f, 0.6343932748f, 0.5956993103f, 0.5555702448f, 0.514102757f, 0.4713967443f, 0.4275550842f, 0.3826834261f, 0.336889863f, 0.2902846634f, 0.2429801822f, 0.1950903237f, 0.1467304677f, 0.09801714122f, 0.04906767607f, 6.123234263e-17f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, 1.0f, 0.9987954497f, 0.9951847196f, 0.9891765118f, 0.9807852507f, 0.9700312614f, 0.9569403529f, 0.9415440559f, 0.9238795042f, 0.903989315f, 0.8819212914f, 0.8577286005f, 0.8314695954f, 0.8032075167f, 0.7730104327f, 0.7409511209f, 0.7071067691f, 0.6715589762f, 0.6343932748f, 0.5956993103f, 0.5555702448f, 0.514102757f, 0.4713967443f, 0.4275550842f, 0.3826834261f, 0.336889863f, 0.2902846634f, 0.2429801822f, 0.1950903237f, 0.1467304677f, 0.09801714122f, 0.04906767607f, 6.123234263e-17f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f}; -constant float tc21[512] = {1.0f, 0.9996988177f, 0.9987954497f, 0.9972904325f, 0.9951847196f, 0.9924795628f, 0.9891765118f, 0.9852776527f, 0.9807852507f, 0.975702107f, 0.9700312614f, 0.963776052f, 0.9569403529f, 0.9495281577f, 0.9415440559f, 0.932992816f, 0.9238795042f, 0.9142097831f, 0.903989315f, 0.893224299f, 0.8819212914f, 0.8700869679f, 0.8577286005f, 0.84485358f, 0.8314695954f, 0.8175848126f, 0.8032075167f, 0.7883464098f, 0.7730104327f, 0.7572088242f, 0.7409511209f, 0.724247098f, 0.7071067691f, 0.689540565f, 0.6715589762f, 0.6531728506f, 0.6343932748f, 0.6152315736f, 0.5956993103f, 0.5758081675f, 0.5555702448f, 0.534997642f, 0.514102757f, 0.492898196f, 0.4713967443f, 0.449611336f, 0.4275550842f, 0.4052413106f, 0.3826834261f, 0.3598950505f, 0.336889863f, 0.3136817515f, 0.2902846634f, 0.266712755f, 0.2429801822f, 0.2191012353f, 0.1950903237f, 0.1709618866f, 0.1467304677f, 0.1224106774f, 0.09801714122f, 0.07356456667f, 0.04906767607f, 0.02454122901f, 1.0f, 0.9996988177f, 0.9987954497f, 0.9972904325f, 0.9951847196f, 0.9924795628f, 0.9891765118f, 0.9852776527f, 0.9807852507f, 0.975702107f, 0.9700312614f, 0.963776052f, 0.9569403529f, 0.9495281577f, 0.9415440559f, 0.932992816f, 0.9238795042f, 0.9142097831f, 0.903989315f, 0.893224299f, 0.8819212914f, 0.8700869679f, 0.8577286005f, 0.84485358f, 0.8314695954f, 0.8175848126f, 0.8032075167f, 0.7883464098f, 0.7730104327f, 0.7572088242f, 0.7409511209f, 0.724247098f, 0.7071067691f, 0.689540565f, 0.6715589762f, 0.6531728506f, 0.6343932748f, 0.6152315736f, 0.5956993103f, 0.5758081675f, 0.5555702448f, 0.534997642f, 0.514102757f, 0.492898196f, 0.4713967443f, 0.449611336f, 0.4275550842f, 0.4052413106f, 0.3826834261f, 0.3598950505f, 0.336889863f, 0.3136817515f, 0.2902846634f, 0.266712755f, 0.2429801822f, 0.2191012353f, 0.1950903237f, 0.1709618866f, 0.1467304677f, 0.1224106774f, 0.09801714122f, 0.07356456667f, 0.04906767607f, 0.02454122901f, 1.0f, 0.9996988177f, 0.9987954497f, 0.9972904325f, 0.9951847196f, 0.9924795628f, 0.9891765118f, 0.9852776527f, 0.9807852507f, 0.975702107f, 0.9700312614f, 0.963776052f, 0.9569403529f, 0.9495281577f, 0.9415440559f, 0.932992816f, 0.9238795042f, 0.9142097831f, 0.903989315f, 0.893224299f, 0.8819212914f, 0.8700869679f, 0.8577286005f, 0.84485358f, 0.8314695954f, 0.8175848126f, 0.8032075167f, 0.7883464098f, 0.7730104327f, 0.7572088242f, 0.7409511209f, 0.724247098f, 0.7071067691f, 0.689540565f, 0.6715589762f, 0.6531728506f, 0.6343932748f, 0.6152315736f, 0.5956993103f, 0.5758081675f, 0.5555702448f, 0.534997642f, 0.514102757f, 0.492898196f, 0.4713967443f, 0.449611336f, 0.4275550842f, 0.4052413106f, 0.3826834261f, 0.3598950505f, 0.336889863f, 0.3136817515f, 0.2902846634f, 0.266712755f, 0.2429801822f, 0.2191012353f, 0.1950903237f, 0.1709618866f, 0.1467304677f, 0.1224106774f, 0.09801714122f, 0.07356456667f, 0.04906767607f, 0.02454122901f, 1.0f, 0.9996988177f, 0.9987954497f, 0.9972904325f, 0.9951847196f, 0.9924795628f, 0.9891765118f, 0.9852776527f, 0.9807852507f, 0.975702107f, 0.9700312614f, 0.963776052f, 0.9569403529f, 0.9495281577f, 0.9415440559f, 0.932992816f, 0.9238795042f, 0.9142097831f, 0.903989315f, 0.893224299f, 0.8819212914f, 0.8700869679f, 0.8577286005f, 0.84485358f, 0.8314695954f, 0.8175848126f, 0.8032075167f, 0.7883464098f, 0.7730104327f, 0.7572088242f, 0.7409511209f, 0.724247098f, 0.7071067691f, 0.689540565f, 0.6715589762f, 0.6531728506f, 0.6343932748f, 0.6152315736f, 0.5956993103f, 0.5758081675f, 0.5555702448f, 0.534997642f, 0.514102757f, 0.492898196f, 0.4713967443f, 0.449611336f, 0.4275550842f, 0.4052413106f, 0.3826834261f, 0.3598950505f, 0.336889863f, 0.3136817515f, 0.2902846634f, 0.266712755f, 0.2429801822f, 0.2191012353f, 0.1950903237f, 0.1709618866f, 0.1467304677f, 0.1224106774f, 0.09801714122f, 0.07356456667f, 0.04906767607f, 0.02454122901f, 1.0f, 0.9996988177f, 0.9987954497f, 0.9972904325f, 0.9951847196f, 0.9924795628f, 0.9891765118f, 0.9852776527f, 0.9807852507f, 0.975702107f, 0.9700312614f, 0.963776052f, 0.9569403529f, 0.9495281577f, 0.9415440559f, 0.932992816f, 0.9238795042f, 0.9142097831f, 0.903989315f, 0.893224299f, 0.8819212914f, 0.8700869679f, 0.8577286005f, 0.84485358f, 0.8314695954f, 0.8175848126f, 0.8032075167f, 0.7883464098f, 0.7730104327f, 0.7572088242f, 0.7409511209f, 0.724247098f, 0.7071067691f, 0.689540565f, 0.6715589762f, 0.6531728506f, 0.6343932748f, 0.6152315736f, 0.5956993103f, 0.5758081675f, 0.5555702448f, 0.534997642f, 0.514102757f, 0.492898196f, 0.4713967443f, 0.449611336f, 0.4275550842f, 0.4052413106f, 0.3826834261f, 0.3598950505f, 0.336889863f, 0.3136817515f, 0.2902846634f, 0.266712755f, 0.2429801822f, 0.2191012353f, 0.1950903237f, 0.1709618866f, 0.1467304677f, 0.1224106774f, 0.09801714122f, 0.07356456667f, 0.04906767607f, 0.02454122901f, 1.0f, 0.9996988177f, 0.9987954497f, 0.9972904325f, 0.9951847196f, 0.9924795628f, 0.9891765118f, 0.9852776527f, 0.9807852507f, 0.975702107f, 0.9700312614f, 0.963776052f, 0.9569403529f, 0.9495281577f, 0.9415440559f, 0.932992816f, 0.9238795042f, 0.9142097831f, 0.903989315f, 0.893224299f, 0.8819212914f, 0.8700869679f, 0.8577286005f, 0.84485358f, 0.8314695954f, 0.8175848126f, 0.8032075167f, 0.7883464098f, 0.7730104327f, 0.7572088242f, 0.7409511209f, 0.724247098f, 0.7071067691f, 0.689540565f, 0.6715589762f, 0.6531728506f, 0.6343932748f, 0.6152315736f, 0.5956993103f, 0.5758081675f, 0.5555702448f, 0.534997642f, 0.514102757f, 0.492898196f, 0.4713967443f, 0.449611336f, 0.4275550842f, 0.4052413106f, 0.3826834261f, 0.3598950505f, 0.336889863f, 0.3136817515f, 0.2902846634f, 0.266712755f, 0.2429801822f, 0.2191012353f, 0.1950903237f, 0.1709618866f, 0.1467304677f, 0.1224106774f, 0.09801714122f, 0.07356456667f, 0.04906767607f, 0.02454122901f, 1.0f, 0.9996988177f, 0.9987954497f, 0.9972904325f, 0.9951847196f, 0.9924795628f, 0.9891765118f, 0.9852776527f, 0.9807852507f, 0.975702107f, 0.9700312614f, 0.963776052f, 0.9569403529f, 0.9495281577f, 0.9415440559f, 0.932992816f, 0.9238795042f, 0.9142097831f, 0.903989315f, 0.893224299f, 0.8819212914f, 0.8700869679f, 0.8577286005f, 0.84485358f, 0.8314695954f, 0.8175848126f, 0.8032075167f, 0.7883464098f, 0.7730104327f, 0.7572088242f, 0.7409511209f, 0.724247098f, 0.7071067691f, 0.689540565f, 0.6715589762f, 0.6531728506f, 0.6343932748f, 0.6152315736f, 0.5956993103f, 0.5758081675f, 0.5555702448f, 0.534997642f, 0.514102757f, 0.492898196f, 0.4713967443f, 0.449611336f, 0.4275550842f, 0.4052413106f, 0.3826834261f, 0.3598950505f, 0.336889863f, 0.3136817515f, 0.2902846634f, 0.266712755f, 0.2429801822f, 0.2191012353f, 0.1950903237f, 0.1709618866f, 0.1467304677f, 0.1224106774f, 0.09801714122f, 0.07356456667f, 0.04906767607f, 0.02454122901f, 1.0f, 0.9996988177f, 0.9987954497f, 0.9972904325f, 0.9951847196f, 0.9924795628f, 0.9891765118f, 0.9852776527f, 0.9807852507f, 0.975702107f, 0.9700312614f, 0.963776052f, 0.9569403529f, 0.9495281577f, 0.9415440559f, 0.932992816f, 0.9238795042f, 0.9142097831f, 0.903989315f, 0.893224299f, 0.8819212914f, 0.8700869679f, 0.8577286005f, 0.84485358f, 0.8314695954f, 0.8175848126f, 0.8032075167f, 0.7883464098f, 0.7730104327f, 0.7572088242f, 0.7409511209f, 0.724247098f, 0.7071067691f, 0.689540565f, 0.6715589762f, 0.6531728506f, 0.6343932748f, 0.6152315736f, 0.5956993103f, 0.5758081675f, 0.5555702448f, 0.534997642f, 0.514102757f, 0.492898196f, 0.4713967443f, 0.449611336f, 0.4275550842f, 0.4052413106f, 0.3826834261f, 0.3598950505f, 0.336889863f, 0.3136817515f, 0.2902846634f, 0.266712755f, 0.2429801822f, 0.2191012353f, 0.1950903237f, 0.1709618866f, 0.1467304677f, 0.1224106774f, 0.09801714122f, 0.07356456667f, 0.04906767607f, 0.02454122901f}; -constant float tc24[512] = {1.0f, 0.9996988177f, 0.9987954497f, 0.9972904325f, 0.9951847196f, 0.9924795628f, 0.9891765118f, 0.9852776527f, 0.9807852507f, 0.975702107f, 0.9700312614f, 0.963776052f, 0.9569403529f, 0.9495281577f, 0.9415440559f, 0.932992816f, 0.9238795042f, 0.9142097831f, 0.903989315f, 0.893224299f, 0.8819212914f, 0.8700869679f, 0.8577286005f, 0.84485358f, 0.8314695954f, 0.8175848126f, 0.8032075167f, 0.7883464098f, 0.7730104327f, 0.7572088242f, 0.7409511209f, 0.724247098f, 0.7071067691f, 0.689540565f, 0.6715589762f, 0.6531728506f, 0.6343932748f, 0.6152315736f, 0.5956993103f, 0.5758081675f, 0.5555702448f, 0.534997642f, 0.514102757f, 0.492898196f, 0.4713967443f, 0.449611336f, 0.4275550842f, 0.4052413106f, 0.3826834261f, 0.3598950505f, 0.336889863f, 0.3136817515f, 0.2902846634f, 0.266712755f, 0.2429801822f, 0.2191012353f, 0.1950903237f, 0.1709618866f, 0.1467304677f, 0.1224106774f, 0.09801714122f, 0.07356456667f, 0.04906767607f, 0.02454122901f, 1.0f, 0.9996988177f, 0.9987954497f, 0.9972904325f, 0.9951847196f, 0.9924795628f, 0.9891765118f, 0.9852776527f, 0.9807852507f, 0.975702107f, 0.9700312614f, 0.963776052f, 0.9569403529f, 0.9495281577f, 0.9415440559f, 0.932992816f, 0.9238795042f, 0.9142097831f, 0.903989315f, 0.893224299f, 0.8819212914f, 0.8700869679f, 0.8577286005f, 0.84485358f, 0.8314695954f, 0.8175848126f, 0.8032075167f, 0.7883464098f, 0.7730104327f, 0.7572088242f, 0.7409511209f, 0.724247098f, 0.7071067691f, 0.689540565f, 0.6715589762f, 0.6531728506f, 0.6343932748f, 0.6152315736f, 0.5956993103f, 0.5758081675f, 0.5555702448f, 0.534997642f, 0.514102757f, 0.492898196f, 0.4713967443f, 0.449611336f, 0.4275550842f, 0.4052413106f, 0.3826834261f, 0.3598950505f, 0.336889863f, 0.3136817515f, 0.2902846634f, 0.266712755f, 0.2429801822f, 0.2191012353f, 0.1950903237f, 0.1709618866f, 0.1467304677f, 0.1224106774f, 0.09801714122f, 0.07356456667f, 0.04906767607f, 0.02454122901f, 1.0f, 0.9996988177f, 0.9987954497f, 0.9972904325f, 0.9951847196f, 0.9924795628f, 0.9891765118f, 0.9852776527f, 0.9807852507f, 0.975702107f, 0.9700312614f, 0.963776052f, 0.9569403529f, 0.9495281577f, 0.9415440559f, 0.932992816f, 0.9238795042f, 0.9142097831f, 0.903989315f, 0.893224299f, 0.8819212914f, 0.8700869679f, 0.8577286005f, 0.84485358f, 0.8314695954f, 0.8175848126f, 0.8032075167f, 0.7883464098f, 0.7730104327f, 0.7572088242f, 0.7409511209f, 0.724247098f, 0.7071067691f, 0.689540565f, 0.6715589762f, 0.6531728506f, 0.6343932748f, 0.6152315736f, 0.5956993103f, 0.5758081675f, 0.5555702448f, 0.534997642f, 0.514102757f, 0.492898196f, 0.4713967443f, 0.449611336f, 0.4275550842f, 0.4052413106f, 0.3826834261f, 0.3598950505f, 0.336889863f, 0.3136817515f, 0.2902846634f, 0.266712755f, 0.2429801822f, 0.2191012353f, 0.1950903237f, 0.1709618866f, 0.1467304677f, 0.1224106774f, 0.09801714122f, 0.07356456667f, 0.04906767607f, 0.02454122901f, 1.0f, 0.9996988177f, 0.9987954497f, 0.9972904325f, 0.9951847196f, 0.9924795628f, 0.9891765118f, 0.9852776527f, 0.9807852507f, 0.975702107f, 0.9700312614f, 0.963776052f, 0.9569403529f, 0.9495281577f, 0.9415440559f, 0.932992816f, 0.9238795042f, 0.9142097831f, 0.903989315f, 0.893224299f, 0.8819212914f, 0.8700869679f, 0.8577286005f, 0.84485358f, 0.8314695954f, 0.8175848126f, 0.8032075167f, 0.7883464098f, 0.7730104327f, 0.7572088242f, 0.7409511209f, 0.724247098f, 0.7071067691f, 0.689540565f, 0.6715589762f, 0.6531728506f, 0.6343932748f, 0.6152315736f, 0.5956993103f, 0.5758081675f, 0.5555702448f, 0.534997642f, 0.514102757f, 0.492898196f, 0.4713967443f, 0.449611336f, 0.4275550842f, 0.4052413106f, 0.3826834261f, 0.3598950505f, 0.336889863f, 0.3136817515f, 0.2902846634f, 0.266712755f, 0.2429801822f, 0.2191012353f, 0.1950903237f, 0.1709618866f, 0.1467304677f, 0.1224106774f, 0.09801714122f, 0.07356456667f, 0.04906767607f, 0.02454122901f, 1.0f, 0.9996988177f, 0.9987954497f, 0.9972904325f, 0.9951847196f, 0.9924795628f, 0.9891765118f, 0.9852776527f, 0.9807852507f, 0.975702107f, 0.9700312614f, 0.963776052f, 0.9569403529f, 0.9495281577f, 0.9415440559f, 0.932992816f, 0.9238795042f, 0.9142097831f, 0.903989315f, 0.893224299f, 0.8819212914f, 0.8700869679f, 0.8577286005f, 0.84485358f, 0.8314695954f, 0.8175848126f, 0.8032075167f, 0.7883464098f, 0.7730104327f, 0.7572088242f, 0.7409511209f, 0.724247098f, 0.7071067691f, 0.689540565f, 0.6715589762f, 0.6531728506f, 0.6343932748f, 0.6152315736f, 0.5956993103f, 0.5758081675f, 0.5555702448f, 0.534997642f, 0.514102757f, 0.492898196f, 0.4713967443f, 0.449611336f, 0.4275550842f, 0.4052413106f, 0.3826834261f, 0.3598950505f, 0.336889863f, 0.3136817515f, 0.2902846634f, 0.266712755f, 0.2429801822f, 0.2191012353f, 0.1950903237f, 0.1709618866f, 0.1467304677f, 0.1224106774f, 0.09801714122f, 0.07356456667f, 0.04906767607f, 0.02454122901f, 1.0f, 0.9996988177f, 0.9987954497f, 0.9972904325f, 0.9951847196f, 0.9924795628f, 0.9891765118f, 0.9852776527f, 0.9807852507f, 0.975702107f, 0.9700312614f, 0.963776052f, 0.9569403529f, 0.9495281577f, 0.9415440559f, 0.932992816f, 0.9238795042f, 0.9142097831f, 0.903989315f, 0.893224299f, 0.8819212914f, 0.8700869679f, 0.8577286005f, 0.84485358f, 0.8314695954f, 0.8175848126f, 0.8032075167f, 0.7883464098f, 0.7730104327f, 0.7572088242f, 0.7409511209f, 0.724247098f, 0.7071067691f, 0.689540565f, 0.6715589762f, 0.6531728506f, 0.6343932748f, 0.6152315736f, 0.5956993103f, 0.5758081675f, 0.5555702448f, 0.534997642f, 0.514102757f, 0.492898196f, 0.4713967443f, 0.449611336f, 0.4275550842f, 0.4052413106f, 0.3826834261f, 0.3598950505f, 0.336889863f, 0.3136817515f, 0.2902846634f, 0.266712755f, 0.2429801822f, 0.2191012353f, 0.1950903237f, 0.1709618866f, 0.1467304677f, 0.1224106774f, 0.09801714122f, 0.07356456667f, 0.04906767607f, 0.02454122901f, 1.0f, 0.9996988177f, 0.9987954497f, 0.9972904325f, 0.9951847196f, 0.9924795628f, 0.9891765118f, 0.9852776527f, 0.9807852507f, 0.975702107f, 0.9700312614f, 0.963776052f, 0.9569403529f, 0.9495281577f, 0.9415440559f, 0.932992816f, 0.9238795042f, 0.9142097831f, 0.903989315f, 0.893224299f, 0.8819212914f, 0.8700869679f, 0.8577286005f, 0.84485358f, 0.8314695954f, 0.8175848126f, 0.8032075167f, 0.7883464098f, 0.7730104327f, 0.7572088242f, 0.7409511209f, 0.724247098f, 0.7071067691f, 0.689540565f, 0.6715589762f, 0.6531728506f, 0.6343932748f, 0.6152315736f, 0.5956993103f, 0.5758081675f, 0.5555702448f, 0.534997642f, 0.514102757f, 0.492898196f, 0.4713967443f, 0.449611336f, 0.4275550842f, 0.4052413106f, 0.3826834261f, 0.3598950505f, 0.336889863f, 0.3136817515f, 0.2902846634f, 0.266712755f, 0.2429801822f, 0.2191012353f, 0.1950903237f, 0.1709618866f, 0.1467304677f, 0.1224106774f, 0.09801714122f, 0.07356456667f, 0.04906767607f, 0.02454122901f, 1.0f, 0.9996988177f, 0.9987954497f, 0.9972904325f, 0.9951847196f, 0.9924795628f, 0.9891765118f, 0.9852776527f, 0.9807852507f, 0.975702107f, 0.9700312614f, 0.963776052f, 0.9569403529f, 0.9495281577f, 0.9415440559f, 0.932992816f, 0.9238795042f, 0.9142097831f, 0.903989315f, 0.893224299f, 0.8819212914f, 0.8700869679f, 0.8577286005f, 0.84485358f, 0.8314695954f, 0.8175848126f, 0.8032075167f, 0.7883464098f, 0.7730104327f, 0.7572088242f, 0.7409511209f, 0.724247098f, 0.7071067691f, 0.689540565f, 0.6715589762f, 0.6531728506f, 0.6343932748f, 0.6152315736f, 0.5956993103f, 0.5758081675f, 0.5555702448f, 0.534997642f, 0.514102757f, 0.492898196f, 0.4713967443f, 0.449611336f, 0.4275550842f, 0.4052413106f, 0.3826834261f, 0.3598950505f, 0.336889863f, 0.3136817515f, 0.2902846634f, 0.266712755f, 0.2429801822f, 0.2191012353f, 0.1950903237f, 0.1709618866f, 0.1467304677f, 0.1224106774f, 0.09801714122f, 0.07356456667f, 0.04906767607f, 0.02454122901f}; -constant float tc22[512] = {1.0f, 0.9972904325f, 0.9891765118f, 0.975702107f, 0.9569403529f, 0.932992816f, 0.903989315f, 0.8700869679f, 0.8314695954f, 0.7883464098f, 0.7409511209f, 0.689540565f, 0.6343932748f, 0.5758081675f, 0.514102757f, 0.449611336f, 0.3826834261f, 0.3136817515f, 0.2429801822f, 0.1709618866f, 0.09801714122f, 0.02454122901f, -0.04906767607f, -0.1224106774f, -0.1950903237f, -0.266712755f, -0.336889863f, -0.4052413106f, -0.4713967443f, -0.534997642f, -0.5956993103f, -0.6531728506f, -0.7071067691f, -0.7572088242f, -0.8032075167f, -0.84485358f, -0.8819212914f, -0.9142097831f, -0.9415440559f, -0.963776052f, -0.9807852507f, -0.9924795628f, -0.9987954497f, -0.9996988177f, -0.9951847196f, -0.9852776527f, -0.9700312614f, -0.9495281577f, -0.9238795042f, -0.893224299f, -0.8577286005f, -0.8175848126f, -0.7730104327f, -0.724247098f, -0.6715589762f, -0.6152315736f, -0.5555702448f, -0.492898196f, -0.4275550842f, -0.3598950505f, -0.2902846634f, -0.2191012353f, -0.1467304677f, -0.07356456667f, 1.0f, 0.9972904325f, 0.9891765118f, 0.975702107f, 0.9569403529f, 0.932992816f, 0.903989315f, 0.8700869679f, 0.8314695954f, 0.7883464098f, 0.7409511209f, 0.689540565f, 0.6343932748f, 0.5758081675f, 0.514102757f, 0.449611336f, 0.3826834261f, 0.3136817515f, 0.2429801822f, 0.1709618866f, 0.09801714122f, 0.02454122901f, -0.04906767607f, -0.1224106774f, -0.1950903237f, -0.266712755f, -0.336889863f, -0.4052413106f, -0.4713967443f, -0.534997642f, -0.5956993103f, -0.6531728506f, -0.7071067691f, -0.7572088242f, -0.8032075167f, -0.84485358f, -0.8819212914f, -0.9142097831f, -0.9415440559f, -0.963776052f, -0.9807852507f, -0.9924795628f, -0.9987954497f, -0.9996988177f, -0.9951847196f, -0.9852776527f, -0.9700312614f, -0.9495281577f, -0.9238795042f, -0.893224299f, -0.8577286005f, -0.8175848126f, -0.7730104327f, -0.724247098f, -0.6715589762f, -0.6152315736f, -0.5555702448f, -0.492898196f, -0.4275550842f, -0.3598950505f, -0.2902846634f, -0.2191012353f, -0.1467304677f, -0.07356456667f, 1.0f, 0.9972904325f, 0.9891765118f, 0.975702107f, 0.9569403529f, 0.932992816f, 0.903989315f, 0.8700869679f, 0.8314695954f, 0.7883464098f, 0.7409511209f, 0.689540565f, 0.6343932748f, 0.5758081675f, 0.514102757f, 0.449611336f, 0.3826834261f, 0.3136817515f, 0.2429801822f, 0.1709618866f, 0.09801714122f, 0.02454122901f, -0.04906767607f, -0.1224106774f, -0.1950903237f, -0.266712755f, -0.336889863f, -0.4052413106f, -0.4713967443f, -0.534997642f, -0.5956993103f, -0.6531728506f, -0.7071067691f, -0.7572088242f, -0.8032075167f, -0.84485358f, -0.8819212914f, -0.9142097831f, -0.9415440559f, -0.963776052f, -0.9807852507f, -0.9924795628f, -0.9987954497f, -0.9996988177f, -0.9951847196f, -0.9852776527f, -0.9700312614f, -0.9495281577f, -0.9238795042f, -0.893224299f, -0.8577286005f, -0.8175848126f, -0.7730104327f, -0.724247098f, -0.6715589762f, -0.6152315736f, -0.5555702448f, -0.492898196f, -0.4275550842f, -0.3598950505f, -0.2902846634f, -0.2191012353f, -0.1467304677f, -0.07356456667f, 1.0f, 0.9972904325f, 0.9891765118f, 0.975702107f, 0.9569403529f, 0.932992816f, 0.903989315f, 0.8700869679f, 0.8314695954f, 0.7883464098f, 0.7409511209f, 0.689540565f, 0.6343932748f, 0.5758081675f, 0.514102757f, 0.449611336f, 0.3826834261f, 0.3136817515f, 0.2429801822f, 0.1709618866f, 0.09801714122f, 0.02454122901f, -0.04906767607f, -0.1224106774f, -0.1950903237f, -0.266712755f, -0.336889863f, -0.4052413106f, -0.4713967443f, -0.534997642f, -0.5956993103f, -0.6531728506f, -0.7071067691f, -0.7572088242f, -0.8032075167f, -0.84485358f, -0.8819212914f, -0.9142097831f, -0.9415440559f, -0.963776052f, -0.9807852507f, -0.9924795628f, -0.9987954497f, -0.9996988177f, -0.9951847196f, -0.9852776527f, -0.9700312614f, -0.9495281577f, -0.9238795042f, -0.893224299f, -0.8577286005f, -0.8175848126f, -0.7730104327f, -0.724247098f, -0.6715589762f, -0.6152315736f, -0.5555702448f, -0.492898196f, -0.4275550842f, -0.3598950505f, -0.2902846634f, -0.2191012353f, -0.1467304677f, -0.07356456667f, 1.0f, 0.9972904325f, 0.9891765118f, 0.975702107f, 0.9569403529f, 0.932992816f, 0.903989315f, 0.8700869679f, 0.8314695954f, 0.7883464098f, 0.7409511209f, 0.689540565f, 0.6343932748f, 0.5758081675f, 0.514102757f, 0.449611336f, 0.3826834261f, 0.3136817515f, 0.2429801822f, 0.1709618866f, 0.09801714122f, 0.02454122901f, -0.04906767607f, -0.1224106774f, -0.1950903237f, -0.266712755f, -0.336889863f, -0.4052413106f, -0.4713967443f, -0.534997642f, -0.5956993103f, -0.6531728506f, -0.7071067691f, -0.7572088242f, -0.8032075167f, -0.84485358f, -0.8819212914f, -0.9142097831f, -0.9415440559f, -0.963776052f, -0.9807852507f, -0.9924795628f, -0.9987954497f, -0.9996988177f, -0.9951847196f, -0.9852776527f, -0.9700312614f, -0.9495281577f, -0.9238795042f, -0.893224299f, -0.8577286005f, -0.8175848126f, -0.7730104327f, -0.724247098f, -0.6715589762f, -0.6152315736f, -0.5555702448f, -0.492898196f, -0.4275550842f, -0.3598950505f, -0.2902846634f, -0.2191012353f, -0.1467304677f, -0.07356456667f, 1.0f, 0.9972904325f, 0.9891765118f, 0.975702107f, 0.9569403529f, 0.932992816f, 0.903989315f, 0.8700869679f, 0.8314695954f, 0.7883464098f, 0.7409511209f, 0.689540565f, 0.6343932748f, 0.5758081675f, 0.514102757f, 0.449611336f, 0.3826834261f, 0.3136817515f, 0.2429801822f, 0.1709618866f, 0.09801714122f, 0.02454122901f, -0.04906767607f, -0.1224106774f, -0.1950903237f, -0.266712755f, -0.336889863f, -0.4052413106f, -0.4713967443f, -0.534997642f, -0.5956993103f, -0.6531728506f, -0.7071067691f, -0.7572088242f, -0.8032075167f, -0.84485358f, -0.8819212914f, -0.9142097831f, -0.9415440559f, -0.963776052f, -0.9807852507f, -0.9924795628f, -0.9987954497f, -0.9996988177f, -0.9951847196f, -0.9852776527f, -0.9700312614f, -0.9495281577f, -0.9238795042f, -0.893224299f, -0.8577286005f, -0.8175848126f, -0.7730104327f, -0.724247098f, -0.6715589762f, -0.6152315736f, -0.5555702448f, -0.492898196f, -0.4275550842f, -0.3598950505f, -0.2902846634f, -0.2191012353f, -0.1467304677f, -0.07356456667f, 1.0f, 0.9972904325f, 0.9891765118f, 0.975702107f, 0.9569403529f, 0.932992816f, 0.903989315f, 0.8700869679f, 0.8314695954f, 0.7883464098f, 0.7409511209f, 0.689540565f, 0.6343932748f, 0.5758081675f, 0.514102757f, 0.449611336f, 0.3826834261f, 0.3136817515f, 0.2429801822f, 0.1709618866f, 0.09801714122f, 0.02454122901f, -0.04906767607f, -0.1224106774f, -0.1950903237f, -0.266712755f, -0.336889863f, -0.4052413106f, -0.4713967443f, -0.534997642f, -0.5956993103f, -0.6531728506f, -0.7071067691f, -0.7572088242f, -0.8032075167f, -0.84485358f, -0.8819212914f, -0.9142097831f, -0.9415440559f, -0.963776052f, -0.9807852507f, -0.9924795628f, -0.9987954497f, -0.9996988177f, -0.9951847196f, -0.9852776527f, -0.9700312614f, -0.9495281577f, -0.9238795042f, -0.893224299f, -0.8577286005f, -0.8175848126f, -0.7730104327f, -0.724247098f, -0.6715589762f, -0.6152315736f, -0.5555702448f, -0.492898196f, -0.4275550842f, -0.3598950505f, -0.2902846634f, -0.2191012353f, -0.1467304677f, -0.07356456667f, 1.0f, 0.9972904325f, 0.9891765118f, 0.975702107f, 0.9569403529f, 0.932992816f, 0.903989315f, 0.8700869679f, 0.8314695954f, 0.7883464098f, 0.7409511209f, 0.689540565f, 0.6343932748f, 0.5758081675f, 0.514102757f, 0.449611336f, 0.3826834261f, 0.3136817515f, 0.2429801822f, 0.1709618866f, 0.09801714122f, 0.02454122901f, -0.04906767607f, -0.1224106774f, -0.1950903237f, -0.266712755f, -0.336889863f, -0.4052413106f, -0.4713967443f, -0.534997642f, -0.5956993103f, -0.6531728506f, -0.7071067691f, -0.7572088242f, -0.8032075167f, -0.84485358f, -0.8819212914f, -0.9142097831f, -0.9415440559f, -0.963776052f, -0.9807852507f, -0.9924795628f, -0.9987954497f, -0.9996988177f, -0.9951847196f, -0.9852776527f, -0.9700312614f, -0.9495281577f, -0.9238795042f, -0.893224299f, -0.8577286005f, -0.8175848126f, -0.7730104327f, -0.724247098f, -0.6715589762f, -0.6152315736f, -0.5555702448f, -0.492898196f, -0.4275550842f, -0.3598950505f, -0.2902846634f, -0.2191012353f, -0.1467304677f, -0.07356456667f}; -constant float tc25[512] = {1.0f, 0.9972904325f, 0.9891765118f, 0.975702107f, 0.9569403529f, 0.932992816f, 0.903989315f, 0.8700869679f, 0.8314695954f, 0.7883464098f, 0.7409511209f, 0.689540565f, 0.6343932748f, 0.5758081675f, 0.514102757f, 0.449611336f, 0.3826834261f, 0.3136817515f, 0.2429801822f, 0.1709618866f, 0.09801714122f, 0.02454122901f, -0.04906767607f, -0.1224106774f, -0.1950903237f, -0.266712755f, -0.336889863f, -0.4052413106f, -0.4713967443f, -0.534997642f, -0.5956993103f, -0.6531728506f, -0.7071067691f, -0.7572088242f, -0.8032075167f, -0.84485358f, -0.8819212914f, -0.9142097831f, -0.9415440559f, -0.963776052f, -0.9807852507f, -0.9924795628f, -0.9987954497f, -0.9996988177f, -0.9951847196f, -0.9852776527f, -0.9700312614f, -0.9495281577f, -0.9238795042f, -0.893224299f, -0.8577286005f, -0.8175848126f, -0.7730104327f, -0.724247098f, -0.6715589762f, -0.6152315736f, -0.5555702448f, -0.492898196f, -0.4275550842f, -0.3598950505f, -0.2902846634f, -0.2191012353f, -0.1467304677f, -0.07356456667f, 1.0f, 0.9972904325f, 0.9891765118f, 0.975702107f, 0.9569403529f, 0.932992816f, 0.903989315f, 0.8700869679f, 0.8314695954f, 0.7883464098f, 0.7409511209f, 0.689540565f, 0.6343932748f, 0.5758081675f, 0.514102757f, 0.449611336f, 0.3826834261f, 0.3136817515f, 0.2429801822f, 0.1709618866f, 0.09801714122f, 0.02454122901f, -0.04906767607f, -0.1224106774f, -0.1950903237f, -0.266712755f, -0.336889863f, -0.4052413106f, -0.4713967443f, -0.534997642f, -0.5956993103f, -0.6531728506f, -0.7071067691f, -0.7572088242f, -0.8032075167f, -0.84485358f, -0.8819212914f, -0.9142097831f, -0.9415440559f, -0.963776052f, -0.9807852507f, -0.9924795628f, -0.9987954497f, -0.9996988177f, -0.9951847196f, -0.9852776527f, -0.9700312614f, -0.9495281577f, -0.9238795042f, -0.893224299f, -0.8577286005f, -0.8175848126f, -0.7730104327f, -0.724247098f, -0.6715589762f, -0.6152315736f, -0.5555702448f, -0.492898196f, -0.4275550842f, -0.3598950505f, -0.2902846634f, -0.2191012353f, -0.1467304677f, -0.07356456667f, 1.0f, 0.9972904325f, 0.9891765118f, 0.975702107f, 0.9569403529f, 0.932992816f, 0.903989315f, 0.8700869679f, 0.8314695954f, 0.7883464098f, 0.7409511209f, 0.689540565f, 0.6343932748f, 0.5758081675f, 0.514102757f, 0.449611336f, 0.3826834261f, 0.3136817515f, 0.2429801822f, 0.1709618866f, 0.09801714122f, 0.02454122901f, -0.04906767607f, -0.1224106774f, -0.1950903237f, -0.266712755f, -0.336889863f, -0.4052413106f, -0.4713967443f, -0.534997642f, -0.5956993103f, -0.6531728506f, -0.7071067691f, -0.7572088242f, -0.8032075167f, -0.84485358f, -0.8819212914f, -0.9142097831f, -0.9415440559f, -0.963776052f, -0.9807852507f, -0.9924795628f, -0.9987954497f, -0.9996988177f, -0.9951847196f, -0.9852776527f, -0.9700312614f, -0.9495281577f, -0.9238795042f, -0.893224299f, -0.8577286005f, -0.8175848126f, -0.7730104327f, -0.724247098f, -0.6715589762f, -0.6152315736f, -0.5555702448f, -0.492898196f, -0.4275550842f, -0.3598950505f, -0.2902846634f, -0.2191012353f, -0.1467304677f, -0.07356456667f, 1.0f, 0.9972904325f, 0.9891765118f, 0.975702107f, 0.9569403529f, 0.932992816f, 0.903989315f, 0.8700869679f, 0.8314695954f, 0.7883464098f, 0.7409511209f, 0.689540565f, 0.6343932748f, 0.5758081675f, 0.514102757f, 0.449611336f, 0.3826834261f, 0.3136817515f, 0.2429801822f, 0.1709618866f, 0.09801714122f, 0.02454122901f, -0.04906767607f, -0.1224106774f, -0.1950903237f, -0.266712755f, -0.336889863f, -0.4052413106f, -0.4713967443f, -0.534997642f, -0.5956993103f, -0.6531728506f, -0.7071067691f, -0.7572088242f, -0.8032075167f, -0.84485358f, -0.8819212914f, -0.9142097831f, -0.9415440559f, -0.963776052f, -0.9807852507f, -0.9924795628f, -0.9987954497f, -0.9996988177f, -0.9951847196f, -0.9852776527f, -0.9700312614f, -0.9495281577f, -0.9238795042f, -0.893224299f, -0.8577286005f, -0.8175848126f, -0.7730104327f, -0.724247098f, -0.6715589762f, -0.6152315736f, -0.5555702448f, -0.492898196f, -0.4275550842f, -0.3598950505f, -0.2902846634f, -0.2191012353f, -0.1467304677f, -0.07356456667f, 1.0f, 0.9972904325f, 0.9891765118f, 0.975702107f, 0.9569403529f, 0.932992816f, 0.903989315f, 0.8700869679f, 0.8314695954f, 0.7883464098f, 0.7409511209f, 0.689540565f, 0.6343932748f, 0.5758081675f, 0.514102757f, 0.449611336f, 0.3826834261f, 0.3136817515f, 0.2429801822f, 0.1709618866f, 0.09801714122f, 0.02454122901f, -0.04906767607f, -0.1224106774f, -0.1950903237f, -0.266712755f, -0.336889863f, -0.4052413106f, -0.4713967443f, -0.534997642f, -0.5956993103f, -0.6531728506f, -0.7071067691f, -0.7572088242f, -0.8032075167f, -0.84485358f, -0.8819212914f, -0.9142097831f, -0.9415440559f, -0.963776052f, -0.9807852507f, -0.9924795628f, -0.9987954497f, -0.9996988177f, -0.9951847196f, -0.9852776527f, -0.9700312614f, -0.9495281577f, -0.9238795042f, -0.893224299f, -0.8577286005f, -0.8175848126f, -0.7730104327f, -0.724247098f, -0.6715589762f, -0.6152315736f, -0.5555702448f, -0.492898196f, -0.4275550842f, -0.3598950505f, -0.2902846634f, -0.2191012353f, -0.1467304677f, -0.07356456667f, 1.0f, 0.9972904325f, 0.9891765118f, 0.975702107f, 0.9569403529f, 0.932992816f, 0.903989315f, 0.8700869679f, 0.8314695954f, 0.7883464098f, 0.7409511209f, 0.689540565f, 0.6343932748f, 0.5758081675f, 0.514102757f, 0.449611336f, 0.3826834261f, 0.3136817515f, 0.2429801822f, 0.1709618866f, 0.09801714122f, 0.02454122901f, -0.04906767607f, -0.1224106774f, -0.1950903237f, -0.266712755f, -0.336889863f, -0.4052413106f, -0.4713967443f, -0.534997642f, -0.5956993103f, -0.6531728506f, -0.7071067691f, -0.7572088242f, -0.8032075167f, -0.84485358f, -0.8819212914f, -0.9142097831f, -0.9415440559f, -0.963776052f, -0.9807852507f, -0.9924795628f, -0.9987954497f, -0.9996988177f, -0.9951847196f, -0.9852776527f, -0.9700312614f, -0.9495281577f, -0.9238795042f, -0.893224299f, -0.8577286005f, -0.8175848126f, -0.7730104327f, -0.724247098f, -0.6715589762f, -0.6152315736f, -0.5555702448f, -0.492898196f, -0.4275550842f, -0.3598950505f, -0.2902846634f, -0.2191012353f, -0.1467304677f, -0.07356456667f, 1.0f, 0.9972904325f, 0.9891765118f, 0.975702107f, 0.9569403529f, 0.932992816f, 0.903989315f, 0.8700869679f, 0.8314695954f, 0.7883464098f, 0.7409511209f, 0.689540565f, 0.6343932748f, 0.5758081675f, 0.514102757f, 0.449611336f, 0.3826834261f, 0.3136817515f, 0.2429801822f, 0.1709618866f, 0.09801714122f, 0.02454122901f, -0.04906767607f, -0.1224106774f, -0.1950903237f, -0.266712755f, -0.336889863f, -0.4052413106f, -0.4713967443f, -0.534997642f, -0.5956993103f, -0.6531728506f, -0.7071067691f, -0.7572088242f, -0.8032075167f, -0.84485358f, -0.8819212914f, -0.9142097831f, -0.9415440559f, -0.963776052f, -0.9807852507f, -0.9924795628f, -0.9987954497f, -0.9996988177f, -0.9951847196f, -0.9852776527f, -0.9700312614f, -0.9495281577f, -0.9238795042f, -0.893224299f, -0.8577286005f, -0.8175848126f, -0.7730104327f, -0.724247098f, -0.6715589762f, -0.6152315736f, -0.5555702448f, -0.492898196f, -0.4275550842f, -0.3598950505f, -0.2902846634f, -0.2191012353f, -0.1467304677f, -0.07356456667f, 1.0f, 0.9972904325f, 0.9891765118f, 0.975702107f, 0.9569403529f, 0.932992816f, 0.903989315f, 0.8700869679f, 0.8314695954f, 0.7883464098f, 0.7409511209f, 0.689540565f, 0.6343932748f, 0.5758081675f, 0.514102757f, 0.449611336f, 0.3826834261f, 0.3136817515f, 0.2429801822f, 0.1709618866f, 0.09801714122f, 0.02454122901f, -0.04906767607f, -0.1224106774f, -0.1950903237f, -0.266712755f, -0.336889863f, -0.4052413106f, -0.4713967443f, -0.534997642f, -0.5956993103f, -0.6531728506f, -0.7071067691f, -0.7572088242f, -0.8032075167f, -0.84485358f, -0.8819212914f, -0.9142097831f, -0.9415440559f, -0.963776052f, -0.9807852507f, -0.9924795628f, -0.9987954497f, -0.9996988177f, -0.9951847196f, -0.9852776527f, -0.9700312614f, -0.9495281577f, -0.9238795042f, -0.893224299f, -0.8577286005f, -0.8175848126f, -0.7730104327f, -0.724247098f, -0.6715589762f, -0.6152315736f, -0.5555702448f, -0.492898196f, -0.4275550842f, -0.3598950505f, -0.2902846634f, -0.2191012353f, -0.1467304677f, -0.07356456667f}; -constant float tc30[512] = {1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f}; -constant float tc33[512] = {1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, 1.0f, 0.9807852507f, 0.9238795042f, 0.8314695954f, 0.7071067691f, 0.5555702448f, 0.3826834261f, 0.1950903237f, 6.123234263e-17f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f}; -constant float tc31[512] = {1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f}; -constant float tc34[512] = {1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f, 1.0f, 0.9951847196f, 0.9807852507f, 0.9569403529f, 0.9238795042f, 0.8819212914f, 0.8314695954f, 0.7730104327f, 0.7071067691f, 0.6343932748f, 0.5555702448f, 0.4713967443f, 0.3826834261f, 0.2902846634f, 0.1950903237f, 0.09801714122f}; -constant float tc32[512] = {1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f}; -constant float tc35[512] = {1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f, 1.0f, 0.9569403529f, 0.8314695954f, 0.6343932748f, 0.3826834261f, 0.09801714122f, -0.1950903237f, -0.4713967443f, -0.7071067691f, -0.8819212914f, -0.9807852507f, -0.9951847196f, -0.9238795042f, -0.7730104327f, -0.5555702448f, -0.2902846634f}; -constant float tc40[512] = {1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f}; -constant float tc43[512] = {1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f, 1.0f, 0.7071067691f, 6.123234263e-17f, -0.7071067691f}; -constant float tc41[512] = {1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f}; -constant float tc44[512] = {1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f, 1.0f, 0.9238795042f, 0.7071067691f, 0.3826834261f}; -constant float tc42[512] = {1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f}; -constant float tc45[512] = {1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f, 1.0f, 0.3826834261f, -0.7071067691f, -0.9238795042f}; - - - -constant float ts00[512] = {-0.0f, -0.003067956772f, -0.006135884672f, -0.009203754365f, -0.01227153838f, -0.01533920597f, -0.01840673015f, -0.02147408016f, -0.02454122901f, -0.02760814503f, -0.030674804f, -0.0337411724f, -0.03680722415f, -0.03987292573f, -0.0429382585f, -0.04600318149f, -0.04906767607f, -0.05213170499f, -0.05519524589f, -0.05825826526f, -0.061320737f, -0.06438262761f, -0.06744392216f, -0.07050457597f, -0.07356456667f, -0.07662386447f, -0.07968243957f, -0.08274026215f, -0.08579730988f, -0.08885355294f, -0.09190895408f, -0.09496349841f, -0.09801714122f, -0.1010698602f, -0.1041216329f, -0.1071724221f, -0.1102222055f, -0.1132709533f, -0.1163186282f, -0.1193652153f, -0.1224106774f, -0.1254549772f, -0.1284981072f, -0.1315400302f, -0.1345807016f, -0.1376201212f, -0.1406582445f, -0.1436950266f, -0.1467304677f, -0.1497645378f, -0.1527971923f, -0.1558284014f, -0.1588581502f, -0.161886394f, -0.1649131179f, -0.167938292f, -0.1709618866f, -0.1739838719f, -0.1770042181f, -0.1800228953f, -0.1830398887f, -0.1860551536f, -0.1890686601f, -0.1920803934f, -0.1950903237f, -0.1980984062f, -0.201104641f, -0.2041089684f, -0.2071113735f, -0.2101118416f, -0.2131103128f, -0.2161068022f, -0.2191012353f, -0.2220936269f, -0.2250839174f, -0.228072077f, -0.2310581058f, -0.234041959f, -0.2370236069f, -0.2400030196f, -0.2429801822f, -0.24595505f, -0.2489276081f, -0.2518978119f, -0.2548656464f, -0.2578310966f, -0.2607941031f, -0.2637546659f, -0.266712755f, -0.2696683109f, -0.2726213634f, -0.2755718231f, -0.27851969f, -0.2814649343f, -0.2844075263f, -0.2873474658f, -0.2902846634f, -0.2932191491f, -0.296150893f, -0.2990798354f, -0.3020059466f, -0.3049292266f, -0.3078496456f, -0.310767144f, -0.3136817515f, -0.3165933788f, -0.3195020258f, -0.3224076927f, -0.3253102899f, -0.3282098472f, -0.3311063051f, -0.3339996636f, -0.336889863f, -0.3397768736f, -0.3426607251f, -0.3455413282f, -0.3484186828f, -0.3512927592f, -0.3541635275f, -0.3570309579f, -0.3598950505f, -0.3627557158f, -0.3656129837f, -0.3684668243f, -0.3713172078f, -0.3741640747f, -0.3770074248f, -0.3798471987f, -0.3826834261f, -0.3855160475f, -0.3883450329f, -0.3911703825f, -0.3939920366f, -0.3968099952f, -0.3996241987f, -0.4024346471f, -0.4052413106f, -0.4080441594f, -0.4108431637f, -0.4136383235f, -0.4164295495f, -0.4192169011f, -0.4220002592f, -0.4247796834f, -0.4275550842f, -0.4303264916f, -0.433093816f, -0.4358570874f, -0.438616246f, -0.4413712621f, -0.4441221356f, -0.4468688369f, -0.449611336f, -0.4523495734f, -0.4550835788f, -0.4578132927f, -0.4605387151f, -0.4632597864f, -0.4659765065f, -0.4686888158f, -0.4713967443f, -0.4741002023f, -0.4767992198f, -0.479493767f, -0.4821837842f, -0.4848692417f, -0.4875501692f, -0.4902264774f, -0.492898196f, -0.4955652654f, -0.4982276559f, -0.5008853674f, -0.5035383701f, -0.5061866641f, -0.5088301301f, -0.5114688277f, -0.514102757f, -0.5167317986f, -0.5193560123f, -0.5219752789f, -0.5245896578f, -0.5271991491f, -0.5298036337f, -0.5324031115f, -0.534997642f, -0.5375870466f, -0.5401714444f, -0.5427507758f, -0.5453249812f, -0.5478940606f, -0.5504579544f, -0.5530167222f, -0.5555702448f, -0.5581185222f, -0.5606615543f, -0.5631993413f, -0.5657318234f, -0.5682589412f, -0.5707807541f, -0.573297143f, -0.5758081675f, -0.5783137679f, -0.5808139443f, -0.5833086371f, -0.5857978463f, -0.5882815719f, -0.5907596946f, -0.5932322741f, -0.5956993103f, -0.5981606841f, -0.6006164551f, -0.6030666232f, -0.6055110693f, -0.6079497933f, -0.6103827953f, -0.6128100753f, -0.6152315736f, -0.6176472902f, -0.6200572252f, -0.6224612594f, -0.6248595119f, -0.6272518039f, -0.6296382546f, -0.6320187449f, -0.6343932748f, -0.6367618442f, -0.6391244531f, -0.6414810419f, -0.6438315511f, -0.6461760402f, -0.64851439f, -0.6508466601f, -0.6531728506f, -0.6554928422f, -0.6578066945f, -0.6601143479f, -0.6624158025f, -0.6647109985f, -0.6669999361f, -0.6692826152f, -0.6715589762f, -0.6738290191f, -0.6760926843f, -0.6783500314f, -0.6806010008f, -0.6828455329f, -0.6850836873f, -0.6873153448f, -0.689540565f, -0.6917592287f, -0.6939714551f, -0.696177125f, -0.6983762383f, -0.7005687952f, -0.7027547359f, -0.7049340606f, -0.7071067691f, -0.7092728019f, -0.7114322186f, -0.7135848403f, -0.7157308459f, -0.7178700566f, -0.720002532f, -0.7221282125f, -0.724247098f, -0.726359129f, -0.728464365f, -0.7305627465f, -0.7326542735f, -0.7347388864f, -0.7368165851f, -0.73888731f, -0.7409511209f, -0.7430079579f, -0.7450577617f, -0.7471005917f, -0.7491363883f, -0.7511651516f, -0.7531868219f, -0.7552013993f, -0.7572088242f, -0.7592092156f, -0.761202395f, -0.7631884217f, -0.7651672363f, -0.7671388984f, -0.7691033483f, -0.7710605264f, -0.7730104327f, -0.7749531269f, -0.7768884897f, -0.7788165212f, -0.7807372212f, -0.7826505899f, -0.7845565677f, -0.786455214f, -0.7883464098f, -0.7902302146f, -0.7921065688f, -0.7939754725f, -0.7958369255f, -0.7976908684f, -0.7995372415f, -0.801376164f, -0.8032075167f, -0.8050313592f, -0.8068475723f, -0.8086561561f, -0.81045717f, -0.8122506142f, -0.8140363097f, -0.8158144355f, -0.8175848126f, -0.8193475008f, -0.8211025f, -0.8228498101f, -0.8245893121f, -0.8263210654f, -0.8280450702f, -0.8297612071f, -0.8314695954f, -0.8331701756f, -0.8348628879f, -0.8365477324f, -0.838224709f, -0.8398938179f, -0.8415549994f, -0.8432082534f, -0.84485358f, -0.8464909196f, -0.8481203318f, -0.8497417569f, -0.851355195f, -0.8529605865f, -0.854557991f, -0.8561473489f, -0.8577286005f, -0.8593018055f, -0.8608669639f, -0.8624239564f, -0.8639728427f, -0.8655136228f, -0.867046237f, -0.8685706854f, -0.8700869679f, -0.8715950847f, -0.8730949759f, -0.8745866418f, -0.8760700822f, -0.8775452971f, -0.8790122271f, -0.8804708719f, -0.8819212914f, -0.8833633661f, -0.8847970963f, -0.8862225413f, -0.8876396418f, -0.8890483379f, -0.8904487491f, -0.8918406963f, -0.893224299f, -0.8945994973f, -0.8959662318f, -0.8973245621f, -0.8986744881f, -0.9000158906f, -0.9013488293f, -0.9026733041f, -0.903989315f, -0.9052967429f, -0.9065957069f, -0.9078860879f, -0.909168005f, -0.9104412794f, -0.9117060304f, -0.9129621983f, -0.9142097831f, -0.9154487252f, -0.9166790843f, -0.9179008007f, -0.9191138744f, -0.9203183055f, -0.9215140343f, -0.9227011204f, -0.9238795042f, -0.9250492454f, -0.9262102246f, -0.9273625016f, -0.9285060763f, -0.9296408892f, -0.9307669401f, -0.9318842888f, -0.932992816f, -0.9340925217f, -0.9351835251f, -0.9362656474f, -0.9373390079f, -0.9384035468f, -0.9394592047f, -0.940506041f, -0.9415440559f, -0.9425731897f, -0.9435934424f, -0.9446048141f, -0.9456073046f, -0.946600914f, -0.9475855827f, -0.9485613704f, -0.9495281577f, -0.950486064f, -0.9514350295f, -0.9523749948f, -0.9533060193f, -0.9542281032f, -0.9551411867f, -0.95604527f, -0.9569403529f, -0.9578264356f, -0.9587034583f, -0.9595715404f, -0.9604305029f, -0.9612804651f, -0.9621214271f, -0.9629532695f, -0.963776052f, -0.9645897746f, -0.9653944373f, -0.9661899805f, -0.9669764638f, -0.9677538276f, -0.9685220718f, -0.9692812562f, -0.9700312614f, -0.9707721472f, -0.9715039134f, -0.9722265005f, -0.9729399681f, -0.9736442566f, -0.974339366f, -0.9750253558f, -0.975702107f, -0.9763697386f, -0.9770281315f, -0.9776773453f, -0.97831738f, -0.9789481759f, -0.9795697927f, -0.9801821113f, -0.9807852507f, -0.9813792109f, -0.9819638729f, -0.9825392962f, -0.9831054807f, -0.9836624265f, -0.9842100739f, -0.9847484827f, -0.9852776527f, -0.9857975245f, -0.9863080978f, -0.9868093729f, -0.9873014092f, -0.9877841473f, -0.988257587f, -0.9887216687f, -0.9891765118f, -0.9896219969f, -0.9900581837f, -0.9904850721f, -0.9909026623f, -0.9913108349f, -0.9917097688f, -0.9920992851f, -0.9924795628f, -0.9928504229f, -0.993211925f, -0.9935641289f, -0.9939069748f, -0.9942404628f, -0.9945645928f, -0.9948793054f, -0.9951847196f, -0.9954807758f, -0.9957674146f, -0.9960446954f, -0.9963126183f, -0.9965711236f, -0.996820271f, -0.9970600605f, -0.9972904325f, -0.9975114465f, -0.997723043f, -0.9979252815f, -0.9981181026f, -0.9983015656f, -0.9984755516f, -0.9986402392f, -0.9987954497f, -0.9989413023f, -0.9990777373f, -0.9992047548f, -0.9993223548f, -0.9994305968f, -0.9995294213f, -0.9996188283f, -0.9996988177f, -0.9997693896f, -0.9998306036f, -0.9998823404f, -0.9999247193f, -0.9999576211f, -0.9999811649f, -0.9999952912f}; -constant float ts03[512] = {-1.0f, -0.9999952912f, -0.9999811649f, -0.9999576211f, -0.9999247193f, -0.9998823404f, -0.9998306036f, -0.9997693896f, -0.9996988177f, -0.9996188283f, -0.9995294213f, -0.9994305968f, -0.9993223548f, -0.9992047548f, -0.9990777373f, -0.9989413023f, -0.9987954497f, -0.9986402392f, -0.9984755516f, -0.9983015656f, -0.9981181026f, -0.9979252815f, -0.997723043f, -0.9975114465f, -0.9972904325f, -0.9970600605f, -0.996820271f, -0.9965711236f, -0.9963126183f, -0.9960446954f, -0.9957674146f, -0.9954807758f, -0.9951847196f, -0.9948793054f, -0.9945645928f, -0.9942404628f, -0.9939069748f, -0.9935641289f, -0.993211925f, -0.9928504229f, -0.9924795628f, -0.9920992851f, -0.9917097688f, -0.9913108349f, -0.9909026623f, -0.9904850721f, -0.9900581837f, -0.9896219969f, -0.9891765118f, -0.9887216687f, -0.988257587f, -0.9877841473f, -0.9873014092f, -0.9868093729f, -0.9863080978f, -0.9857975245f, -0.9852776527f, -0.9847484827f, -0.9842100739f, -0.9836624265f, -0.9831054807f, -0.9825392962f, -0.9819638729f, -0.9813792109f, -0.9807852507f, -0.9801821113f, -0.9795697927f, -0.9789481759f, -0.97831738f, -0.9776773453f, -0.9770281315f, -0.9763697386f, -0.975702107f, -0.9750253558f, -0.974339366f, -0.9736442566f, -0.9729399681f, -0.9722265005f, -0.9715039134f, -0.9707721472f, -0.9700312614f, -0.9692812562f, -0.9685220718f, -0.9677538276f, -0.9669764638f, -0.9661899805f, -0.9653944373f, -0.9645897746f, -0.963776052f, -0.9629532695f, -0.9621214271f, -0.9612804651f, -0.9604305029f, -0.9595715404f, -0.9587034583f, -0.9578264356f, -0.9569403529f, -0.95604527f, -0.9551411867f, -0.9542281032f, -0.9533060193f, -0.9523749948f, -0.9514350295f, -0.950486064f, -0.9495281577f, -0.9485613704f, -0.9475855827f, -0.946600914f, -0.9456073046f, -0.9446048141f, -0.9435934424f, -0.9425731897f, -0.9415440559f, -0.940506041f, -0.9394592047f, -0.9384035468f, -0.9373390079f, -0.9362656474f, -0.9351835251f, -0.9340925217f, -0.932992816f, -0.9318842888f, -0.9307669401f, -0.9296408892f, -0.9285060763f, -0.9273625016f, -0.9262102246f, -0.9250492454f, -0.9238795042f, -0.9227011204f, -0.9215140343f, -0.9203183055f, -0.9191138744f, -0.9179008007f, -0.9166790843f, -0.9154487252f, -0.9142097831f, -0.9129621983f, -0.9117060304f, -0.9104412794f, -0.909168005f, -0.9078860879f, -0.9065957069f, -0.9052967429f, -0.903989315f, -0.9026733041f, -0.9013488293f, -0.9000158906f, -0.8986744881f, -0.8973245621f, -0.8959662318f, -0.8945994973f, -0.893224299f, -0.8918406963f, -0.8904487491f, -0.8890483379f, -0.8876396418f, -0.8862225413f, -0.8847970963f, -0.8833633661f, -0.8819212914f, -0.8804708719f, -0.8790122271f, -0.8775452971f, -0.8760700822f, -0.8745866418f, -0.8730949759f, -0.8715950847f, -0.8700869679f, -0.8685706854f, -0.867046237f, -0.8655136228f, -0.8639728427f, -0.8624239564f, -0.8608669639f, -0.8593018055f, -0.8577286005f, -0.8561473489f, -0.854557991f, -0.8529605865f, -0.851355195f, -0.8497417569f, -0.8481203318f, -0.8464909196f, -0.84485358f, -0.8432082534f, -0.8415549994f, -0.8398938179f, -0.838224709f, -0.8365477324f, -0.8348628879f, -0.8331701756f, -0.8314695954f, -0.8297612071f, -0.8280450702f, -0.8263210654f, -0.8245893121f, -0.8228498101f, -0.8211025f, -0.8193475008f, -0.8175848126f, -0.8158144355f, -0.8140363097f, -0.8122506142f, -0.81045717f, -0.8086561561f, -0.8068475723f, -0.8050313592f, -0.8032075167f, -0.801376164f, -0.7995372415f, -0.7976908684f, -0.7958369255f, -0.7939754725f, -0.7921065688f, -0.7902302146f, -0.7883464098f, -0.786455214f, -0.7845565677f, -0.7826505899f, -0.7807372212f, -0.7788165212f, -0.7768884897f, -0.7749531269f, -0.7730104327f, -0.7710605264f, -0.7691033483f, -0.7671388984f, -0.7651672363f, -0.7631884217f, -0.761202395f, -0.7592092156f, -0.7572088242f, -0.7552013993f, -0.7531868219f, -0.7511651516f, -0.7491363883f, -0.7471005917f, -0.7450577617f, -0.7430079579f, -0.7409511209f, -0.73888731f, -0.7368165851f, -0.7347388864f, -0.7326542735f, -0.7305627465f, -0.728464365f, -0.726359129f, -0.724247098f, -0.7221282125f, -0.720002532f, -0.7178700566f, -0.7157308459f, -0.7135848403f, -0.7114322186f, -0.7092728019f, -0.7071067691f, -0.7049340606f, -0.7027547359f, -0.7005687952f, -0.6983762383f, -0.696177125f, -0.6939714551f, -0.6917592287f, -0.689540565f, -0.6873153448f, -0.6850836873f, -0.6828455329f, -0.6806010008f, -0.6783500314f, -0.6760926843f, -0.6738290191f, -0.6715589762f, -0.6692826152f, -0.6669999361f, -0.6647109985f, -0.6624158025f, -0.6601143479f, -0.6578066945f, -0.6554928422f, -0.6531728506f, -0.6508466601f, -0.64851439f, -0.6461760402f, -0.6438315511f, -0.6414810419f, -0.6391244531f, -0.6367618442f, -0.6343932748f, -0.6320187449f, -0.6296382546f, -0.6272518039f, -0.6248595119f, -0.6224612594f, -0.6200572252f, -0.6176472902f, -0.6152315736f, -0.6128100753f, -0.6103827953f, -0.6079497933f, -0.6055110693f, -0.6030666232f, -0.6006164551f, -0.5981606841f, -0.5956993103f, -0.5932322741f, -0.5907596946f, -0.5882815719f, -0.5857978463f, -0.5833086371f, -0.5808139443f, -0.5783137679f, -0.5758081675f, -0.573297143f, -0.5707807541f, -0.5682589412f, -0.5657318234f, -0.5631993413f, -0.5606615543f, -0.5581185222f, -0.5555702448f, -0.5530167222f, -0.5504579544f, -0.5478940606f, -0.5453249812f, -0.5427507758f, -0.5401714444f, -0.5375870466f, -0.534997642f, -0.5324031115f, -0.5298036337f, -0.5271991491f, -0.5245896578f, -0.5219752789f, -0.5193560123f, -0.5167317986f, -0.514102757f, -0.5114688277f, -0.5088301301f, -0.5061866641f, -0.5035383701f, -0.5008853674f, -0.4982276559f, -0.4955652654f, -0.492898196f, -0.4902264774f, -0.4875501692f, -0.4848692417f, -0.4821837842f, -0.479493767f, -0.4767992198f, -0.4741002023f, -0.4713967443f, -0.4686888158f, -0.4659765065f, -0.4632597864f, -0.4605387151f, -0.4578132927f, -0.4550835788f, -0.4523495734f, -0.449611336f, -0.4468688369f, -0.4441221356f, -0.4413712621f, -0.438616246f, -0.4358570874f, -0.433093816f, -0.4303264916f, -0.4275550842f, -0.4247796834f, -0.4220002592f, -0.4192169011f, -0.4164295495f, -0.4136383235f, -0.4108431637f, -0.4080441594f, -0.4052413106f, -0.4024346471f, -0.3996241987f, -0.3968099952f, -0.3939920366f, -0.3911703825f, -0.3883450329f, -0.3855160475f, -0.3826834261f, -0.3798471987f, -0.3770074248f, -0.3741640747f, -0.3713172078f, -0.3684668243f, -0.3656129837f, -0.3627557158f, -0.3598950505f, -0.3570309579f, -0.3541635275f, -0.3512927592f, -0.3484186828f, -0.3455413282f, -0.3426607251f, -0.3397768736f, -0.336889863f, -0.3339996636f, -0.3311063051f, -0.3282098472f, -0.3253102899f, -0.3224076927f, -0.3195020258f, -0.3165933788f, -0.3136817515f, -0.310767144f, -0.3078496456f, -0.3049292266f, -0.3020059466f, -0.2990798354f, -0.296150893f, -0.2932191491f, -0.2902846634f, -0.2873474658f, -0.2844075263f, -0.2814649343f, -0.27851969f, -0.2755718231f, -0.2726213634f, -0.2696683109f, -0.266712755f, -0.2637546659f, -0.2607941031f, -0.2578310966f, -0.2548656464f, -0.2518978119f, -0.2489276081f, -0.24595505f, -0.2429801822f, -0.2400030196f, -0.2370236069f, -0.234041959f, -0.2310581058f, -0.228072077f, -0.2250839174f, -0.2220936269f, -0.2191012353f, -0.2161068022f, -0.2131103128f, -0.2101118416f, -0.2071113735f, -0.2041089684f, -0.201104641f, -0.1980984062f, -0.1950903237f, -0.1920803934f, -0.1890686601f, -0.1860551536f, -0.1830398887f, -0.1800228953f, -0.1770042181f, -0.1739838719f, -0.1709618866f, -0.167938292f, -0.1649131179f, -0.161886394f, -0.1588581502f, -0.1558284014f, -0.1527971923f, -0.1497645378f, -0.1467304677f, -0.1436950266f, -0.1406582445f, -0.1376201212f, -0.1345807016f, -0.1315400302f, -0.1284981072f, -0.1254549772f, -0.1224106774f, -0.1193652153f, -0.1163186282f, -0.1132709533f, -0.1102222055f, -0.1071724221f, -0.1041216329f, -0.1010698602f, -0.09801714122f, -0.09496349841f, -0.09190895408f, -0.08885355294f, -0.08579730988f, -0.08274026215f, -0.07968243957f, -0.07662386447f, -0.07356456667f, -0.07050457597f, -0.06744392216f, -0.06438262761f, -0.061320737f, -0.05825826526f, -0.05519524589f, -0.05213170499f, -0.04906767607f, -0.04600318149f, -0.0429382585f, -0.03987292573f, -0.03680722415f, -0.0337411724f, -0.030674804f, -0.02760814503f, -0.02454122901f, -0.02147408016f, -0.01840673015f, -0.01533920597f, -0.01227153838f, -0.009203754365f, -0.006135884672f, -0.003067956772f}; -constant float ts01[512] = {-0.0f, -0.001533980132f, -0.003067956772f, -0.004601926077f, -0.006135884672f, -0.007669828832f, -0.009203754365f, -0.01073765941f, -0.01227153838f, -0.01380538847f, -0.01533920597f, -0.01687298715f, -0.01840673015f, -0.01994042844f, -0.02147408016f, -0.02300768159f, -0.02454122901f, -0.02607471868f, -0.02760814503f, -0.02914150804f, -0.030674804f, -0.03220802546f, -0.0337411724f, -0.03527423739f, -0.03680722415f, -0.03834012151f, -0.03987292573f, -0.04140564054f, -0.0429382585f, -0.04447077215f, -0.04600318149f, -0.04753548279f, -0.04906767607f, -0.05059975013f, -0.05213170499f, -0.05366353691f, -0.05519524589f, -0.05672682077f, -0.05825826526f, -0.05978957191f, -0.061320737f, -0.06285175681f, -0.06438262761f, -0.06591334939f, -0.06744392216f, -0.06897433102f, -0.07050457597f, -0.07203464955f, -0.07356456667f, -0.07509429753f, -0.07662386447f, -0.07815324515f, -0.07968243957f, -0.08121144772f, -0.08274026215f, -0.08426889032f, -0.08579730988f, -0.08732553571f, -0.08885355294f, -0.09038136154f, -0.09190895408f, -0.09343633801f, -0.09496349841f, -0.09649042785f, -0.09801714122f, -0.09954361618f, -0.1010698602f, -0.1025958657f, -0.1041216329f, -0.1056471542f, -0.1071724221f, -0.1086974442f, -0.1102222055f, -0.1117467135f, -0.1132709533f, -0.1147949249f, -0.1163186282f, -0.1178420633f, -0.1193652153f, -0.1208880842f, -0.1224106774f, -0.1239329726f, -0.1254549772f, -0.1269766986f, -0.1284981072f, -0.1300192177f, -0.1315400302f, -0.1330605298f, -0.1345807016f, -0.1361005753f, -0.1376201212f, -0.1391393393f, -0.1406582445f, -0.1421768069f, -0.1436950266f, -0.1452129185f, -0.1467304677f, -0.1482476741f, -0.1497645378f, -0.1512810439f, -0.1527971923f, -0.1543129683f, -0.1558284014f, -0.1573434621f, -0.1588581502f, -0.1603724509f, -0.161886394f, -0.1633999497f, -0.1649131179f, -0.1664258987f, -0.167938292f, -0.169450298f, -0.1709618866f, -0.1724730879f, -0.1739838719f, -0.1754942536f, -0.1770042181f, -0.1785137653f, -0.1800228953f, -0.1815316081f, -0.1830398887f, -0.1845477372f, -0.1860551536f, -0.1875621229f, -0.1890686601f, -0.1905747503f, -0.1920803934f, -0.1935855895f, -0.1950903237f, -0.1965945959f, -0.1980984062f, -0.1996017545f, -0.201104641f, -0.2026070356f, -0.2041089684f, -0.2056104094f, -0.2071113735f, -0.208611846f, -0.2101118416f, -0.2116113305f, -0.2131103128f, -0.2146088183f, -0.2161068022f, -0.2176042795f, -0.2191012353f, -0.2205976844f, -0.2220936269f, -0.2235890329f, -0.2250839174f, -0.2265782654f, -0.228072077f, -0.2295653671f, -0.2310581058f, -0.2325503081f, -0.234041959f, -0.2355330586f, -0.2370236069f, -0.2385135889f, -0.2400030196f, -0.241491884f, -0.2429801822f, -0.2444678992f, -0.24595505f, -0.2474416196f, -0.2489276081f, -0.2504130006f, -0.2518978119f, -0.2533820271f, -0.2548656464f, -0.2563486695f, -0.2578310966f, -0.2593129277f, -0.2607941031f, -0.2622747123f, -0.2637546659f, -0.2652340233f, -0.266712755f, -0.2681908607f, -0.2696683109f, -0.271145165f, -0.2726213634f, -0.2740969062f, -0.2755718231f, -0.2770460844f, -0.27851969f, -0.27999264f, -0.2814649343f, -0.282936573f, -0.2844075263f, -0.2858778238f, -0.2873474658f, -0.2888164222f, -0.2902846634f, -0.291752249f, -0.2932191491f, -0.2946853638f, -0.296150893f, -0.2976157069f, -0.2990798354f, -0.3005432487f, -0.3020059466f, -0.3034679592f, -0.3049292266f, -0.3063898087f, -0.3078496456f, -0.3093087673f, -0.310767144f, -0.3122248054f, -0.3136817515f, -0.3151379228f, -0.3165933788f, -0.3180480897f, -0.3195020258f, -0.3209552467f, -0.3224076927f, -0.3238593638f, -0.3253102899f, -0.3267604411f, -0.3282098472f, -0.3296584487f, -0.3311063051f, -0.3325533569f, -0.3339996636f, -0.3354451358f, -0.336889863f, -0.3383337557f, -0.3397768736f, -0.3412192166f, -0.3426607251f, -0.344101429f, -0.3455413282f, -0.3469804227f, -0.3484186828f, -0.3498561382f, -0.3512927592f, -0.3527285457f, -0.3541635275f, -0.3555976748f, -0.3570309579f, -0.3584634066f, -0.3598950505f, -0.3613258004f, -0.3627557158f, -0.3641847968f, -0.3656129837f, -0.3670403361f, -0.3684668243f, -0.3698924482f, -0.3713172078f, -0.3727410734f, -0.3741640747f, -0.3755861819f, -0.3770074248f, -0.3784277439f, -0.3798471987f, -0.3812657595f, -0.3826834261f, -0.3841001987f, -0.3855160475f, -0.3869310021f, -0.3883450329f, -0.3897581697f, -0.3911703825f, -0.3925816715f, -0.3939920366f, -0.3954014778f, -0.3968099952f, -0.3982175589f, -0.3996241987f, -0.4010298848f, -0.4024346471f, -0.4038384557f, -0.4052413106f, -0.4066432118f, -0.4080441594f, -0.4094441533f, -0.4108431637f, -0.4122412205f, -0.4136383235f, -0.4150344133f, -0.4164295495f, -0.4178237021f, -0.4192169011f, -0.4206090868f, -0.4220002592f, -0.4233904779f, -0.4247796834f, -0.4261678755f, -0.4275550842f, -0.4289412796f, -0.4303264916f, -0.4317106605f, -0.433093816f, -0.4344759583f, -0.4358570874f, -0.4372371733f, -0.438616246f, -0.4399942756f, -0.4413712621f, -0.4427472353f, -0.4441221356f, -0.4454960227f, -0.4468688369f, -0.448240608f, -0.449611336f, -0.4509809911f, -0.4523495734f, -0.4537171125f, -0.4550835788f, -0.4564489722f, -0.4578132927f, -0.4591765404f, -0.4605387151f, -0.4618997872f, -0.4632597864f, -0.4646186829f, -0.4659765065f, -0.4673331976f, -0.4686888158f, -0.4700433314f, -0.4713967443f, -0.4727490246f, -0.4741002023f, -0.4754502773f, -0.4767992198f, -0.4781470597f, -0.479493767f, -0.4808393419f, -0.4821837842f, -0.4835270643f, -0.4848692417f, -0.4862102866f, -0.4875501692f, -0.4888888896f, -0.4902264774f, -0.4915629029f, -0.492898196f, -0.4942322969f, -0.4955652654f, -0.4968970418f, -0.4982276559f, -0.4995571077f, -0.5008853674f, -0.5022124648f, -0.5035383701f, -0.5048630834f, -0.5061866641f, -0.5075089931f, -0.5088301301f, -0.510150075f, -0.5114688277f, -0.5127863884f, -0.514102757f, -0.5154178739f, -0.5167317986f, -0.5180445313f, -0.5193560123f, -0.5206662416f, -0.5219752789f, -0.523283124f, -0.5245896578f, -0.5258949995f, -0.5271991491f, -0.5285019875f, -0.5298036337f, -0.5311040282f, -0.5324031115f, -0.5337010026f, -0.534997642f, -0.5362929702f, -0.5375870466f, -0.538879931f, -0.5401714444f, -0.5414617658f, -0.5427507758f, -0.5440385342f, -0.5453249812f, -0.5466101766f, -0.5478940606f, -0.5491766334f, -0.5504579544f, -0.5517379642f, -0.5530167222f, -0.5542941093f, -0.5555702448f, -0.5568450093f, -0.5581185222f, -0.5593907237f, -0.5606615543f, -0.5619311333f, -0.5631993413f, -0.564466238f, -0.5657318234f, -0.566996038f, -0.5682589412f, -0.5695205331f, -0.5707807541f, -0.5720396042f, -0.573297143f, -0.5745533705f, -0.5758081675f, -0.5770616531f, -0.5783137679f, -0.5795645714f, -0.5808139443f, -0.582062006f, -0.5833086371f, -0.584553957f, -0.5857978463f, -0.5870403647f, -0.5882815719f, -0.5895212889f, -0.5907596946f, -0.5919966698f, -0.5932322741f, -0.5944665074f, -0.5956993103f, -0.5969306827f, -0.5981606841f, -0.5993893147f, -0.6006164551f, -0.6018422246f, -0.6030666232f, -0.6042895317f, -0.6055110693f, -0.6067311168f, -0.6079497933f, -0.6091670394f, -0.6103827953f, -0.6115971804f, -0.6128100753f, -0.6140215397f, -0.6152315736f, -0.616440177f, -0.6176472902f, -0.618852973f, -0.6200572252f, -0.6212599874f, -0.6224612594f, -0.6236611009f, -0.6248595119f, -0.6260563731f, -0.6272518039f, -0.6284457445f, -0.6296382546f, -0.630829215f, -0.6320187449f, -0.6332067847f, -0.6343932748f, -0.6355783343f, -0.6367618442f, -0.6379439235f, -0.6391244531f, -0.6403034925f, -0.6414810419f, -0.6426570415f, -0.6438315511f, -0.6450045109f, -0.6461760402f, -0.6473459601f, -0.64851439f, -0.6496813297f, -0.6508466601f, -0.65201056f, -0.6531728506f, -0.6543335915f, -0.6554928422f, -0.6566505432f, -0.6578066945f, -0.6589612961f, -0.6601143479f, -0.6612658501f, -0.6624158025f, -0.6635641456f, -0.6647109985f, -0.6658562422f, -0.6669999361f, -0.6681420207f, -0.6692826152f, -0.6704215407f, -0.6715589762f, -0.6726947427f, -0.6738290191f, -0.6749616265f, -0.6760926843f, -0.6772221923f, -0.6783500314f, -0.6794763207f, -0.6806010008f, -0.6817240715f, -0.6828455329f, -0.683965385f, -0.6850836873f, -0.6862003207f, -0.6873153448f, -0.6884287596f, -0.689540565f, -0.6906507015f, -0.6917592287f, -0.6928661466f, -0.6939714551f, -0.6950750947f, -0.696177125f, -0.6972774863f, -0.6983762383f, -0.6994733214f, -0.7005687952f, -0.7016626f, -0.7027547359f, -0.7038452625f, -0.7049340606f, -0.7060212493f}; -constant float ts04[512] = {-0.7071067691f, -0.7081906199f, -0.7092728019f, -0.7103533745f, -0.7114322186f, -0.7125093937f, -0.7135848403f, -0.7146586776f, -0.7157308459f, -0.7168012857f, -0.7178700566f, -0.718937099f, -0.720002532f, -0.7210661769f, -0.7221282125f, -0.7231884599f, -0.724247098f, -0.7253039479f, -0.726359129f, -0.727412641f, -0.728464365f, -0.72951442f, -0.7305627465f, -0.7316094041f, -0.7326542735f, -0.7336974144f, -0.7347388864f, -0.7357785702f, -0.7368165851f, -0.7378528118f, -0.73888731f, -0.7399200797f, -0.7409511209f, -0.7419804335f, -0.7430079579f, -0.7440337539f, -0.7450577617f, -0.7460801005f, -0.7471005917f, -0.7481193542f, -0.7491363883f, -0.7501516342f, -0.7511651516f, -0.7521768212f, -0.7531868219f, -0.7541949749f, -0.7552013993f, -0.756205976f, -0.7572088242f, -0.7582098842f, -0.7592092156f, -0.7602066994f, -0.761202395f, -0.7621963024f, -0.7631884217f, -0.7641787529f, -0.7651672363f, -0.7661539912f, -0.7671388984f, -0.7681220174f, -0.7691033483f, -0.7700828314f, -0.7710605264f, -0.7720363736f, -0.7730104327f, -0.7739827037f, -0.7749531269f, -0.7759217024f, -0.7768884897f, -0.7778534293f, -0.7788165212f, -0.7797777653f, -0.7807372212f, -0.7816948295f, -0.7826505899f, -0.7836045027f, -0.7845565677f, -0.7855068445f, -0.786455214f, -0.7874017358f, -0.7883464098f, -0.7892892361f, -0.7902302146f, -0.7911693454f, -0.7921065688f, -0.7930419445f, -0.7939754725f, -0.7949071527f, -0.7958369255f, -0.796764791f, -0.7976908684f, -0.7986149788f, -0.7995372415f, -0.8004576564f, -0.801376164f, -0.8022928238f, -0.8032075167f, -0.8041203618f, -0.8050313592f, -0.8059403896f, -0.8068475723f, -0.8077528477f, -0.8086561561f, -0.8095576167f, -0.81045717f, -0.8113548756f, -0.8122506142f, -0.8131443858f, -0.8140363097f, -0.8149263263f, -0.8158144355f, -0.8167005777f, -0.8175848126f, -0.8184671402f, -0.8193475008f, -0.8202259541f, -0.8211025f, -0.8219771385f, -0.8228498101f, -0.8237205148f, -0.8245893121f, -0.8254561424f, -0.8263210654f, -0.8271840215f, -0.8280450702f, -0.8289040923f, -0.8297612071f, -0.8306164145f, -0.8314695954f, -0.832320869f, -0.8331701756f, -0.8340175152f, -0.8348628879f, -0.8357062936f, -0.8365477324f, -0.8373872042f, -0.838224709f, -0.8390602469f, -0.8398938179f, -0.8407253623f, -0.8415549994f, -0.8423826098f, -0.8432082534f, -0.8440318704f, -0.84485358f, -0.8456732631f, -0.8464909196f, -0.8473066092f, -0.8481203318f, -0.8489320278f, -0.8497417569f, -0.8505494595f, -0.851355195f, -0.8521589041f, -0.8529605865f, -0.8537603021f, -0.854557991f, -0.8553536534f, -0.8561473489f, -0.8569389582f, -0.8577286005f, -0.8585162163f, -0.8593018055f, -0.8600853682f, -0.8608669639f, -0.8616464734f, -0.8624239564f, -0.8631994128f, -0.8639728427f, -0.864744246f, -0.8655136228f, -0.866280973f, -0.867046237f, -0.8678094745f, -0.8685706854f, -0.8693298697f, -0.8700869679f, -0.8708420396f, -0.8715950847f, -0.8723460436f, -0.8730949759f, -0.8738418221f, -0.8745866418f, -0.8753293753f, -0.8760700822f, -0.8768087029f, -0.8775452971f, -0.8782798052f, -0.8790122271f, -0.8797426224f, -0.8804708719f, -0.8811970949f, -0.8819212914f, -0.882643342f, -0.8833633661f, -0.8840812445f, -0.8847970963f, -0.8855108619f, -0.8862225413f, -0.8869321346f, -0.8876396418f, -0.8883450627f, -0.8890483379f, -0.8897495866f, -0.8904487491f, -0.8911457658f, -0.8918406963f, -0.8925335407f, -0.893224299f, -0.893912971f, -0.8945994973f, -0.8952839375f, -0.8959662318f, -0.8966464996f, -0.8973245621f, -0.898000598f, -0.8986744881f, -0.8993462324f, -0.9000158906f, -0.900683403f, -0.9013488293f, -0.9020121694f, -0.9026733041f, -0.9033323526f, -0.903989315f, -0.9046440721f, -0.9052967429f, -0.905947268f, -0.9065957069f, -0.9072420001f, -0.9078860879f, -0.9085280895f, -0.909168005f, -0.9098057151f, -0.9104412794f, -0.9110747576f, -0.9117060304f, -0.9123351574f, -0.9129621983f, -0.9135870337f, -0.9142097831f, -0.914830327f, -0.9154487252f, -0.9160649776f, -0.9166790843f, -0.9172909856f, -0.9179008007f, -0.9185084105f, -0.9191138744f, -0.919717133f, -0.9203183055f, -0.920917213f, -0.9215140343f, -0.9221086502f, -0.9227011204f, -0.9232914448f, -0.9238795042f, -0.9244654775f, -0.9250492454f, -0.9256308079f, -0.9262102246f, -0.9267874956f, -0.9273625016f, -0.9279354215f, -0.9285060763f, -0.9290745854f, -0.9296408892f, -0.9302050471f, -0.9307669401f, -0.9313266873f, -0.9318842888f, -0.9324396253f, -0.932992816f, -0.9335438013f, -0.9340925217f, -0.9346391559f, -0.9351835251f, -0.9357256889f, -0.9362656474f, -0.9368034601f, -0.9373390079f, -0.9378723502f, -0.9384035468f, -0.9389324784f, -0.9394592047f, -0.9399837255f, -0.940506041f, -0.9410261512f, -0.9415440559f, -0.9420597553f, -0.9425731897f, -0.9430844188f, -0.9435934424f, -0.9441002607f, -0.9446048141f, -0.9451072216f, -0.9456073046f, -0.9461052418f, -0.946600914f, -0.9470943809f, -0.9475855827f, -0.9480745792f, -0.9485613704f, -0.9490458965f, -0.9495281577f, -0.9500082731f, -0.950486064f, -0.9509616494f, -0.9514350295f, -0.9519061446f, -0.9523749948f, -0.9528416395f, -0.9533060193f, -0.9537681937f, -0.9542281032f, -0.9546857476f, -0.9551411867f, -0.9555943608f, -0.95604527f, -0.9564939141f, -0.9569403529f, -0.9573845267f, -0.9578264356f, -0.9582660794f, -0.9587034583f, -0.9591386318f, -0.9595715404f, -0.9600021243f, -0.9604305029f, -0.9608566165f, -0.9612804651f, -0.9617020488f, -0.9621214271f, -0.9625384808f, -0.9629532695f, -0.9633657932f, -0.963776052f, -0.9641840458f, -0.9645897746f, -0.9649932384f, -0.9653944373f, -0.9657933712f, -0.9661899805f, -0.9665843844f, -0.9669764638f, -0.9673662782f, -0.9677538276f, -0.968139112f, -0.9685220718f, -0.9689028263f, -0.9692812562f, -0.9696573615f, -0.9700312614f, -0.9704028368f, -0.9707721472f, -0.971139133f, -0.9715039134f, -0.9718663096f, -0.9722265005f, -0.9725843668f, -0.9729399681f, -0.9732932448f, -0.9736442566f, -0.9739929438f, -0.974339366f, -0.9746835232f, -0.9750253558f, -0.9753648639f, -0.975702107f, -0.9760370851f, -0.9763697386f, -0.9767000675f, -0.9770281315f, -0.9773538709f, -0.9776773453f, -0.9779984951f, -0.97831738f, -0.9786339402f, -0.9789481759f, -0.9792601466f, -0.9795697927f, -0.9798771143f, -0.9801821113f, -0.9804848433f, -0.9807852507f, -0.9810833931f, -0.9813792109f, -0.9816727042f, -0.9819638729f, -0.982252717f, -0.9825392962f, -0.9828235507f, -0.9831054807f, -0.9833850861f, -0.9836624265f, -0.9839374423f, -0.9842100739f, -0.9844804406f, -0.9847484827f, -0.9850142598f, -0.9852776527f, -0.9855387211f, -0.9857975245f, -0.9860539436f, -0.9863080978f, -0.9865599275f, -0.9868093729f, -0.9870565534f, -0.9873014092f, -0.9875439405f, -0.9877841473f, -0.9880220294f, -0.988257587f, -0.9884908199f, -0.9887216687f, -0.9889502525f, -0.9891765118f, -0.9894004464f, -0.9896219969f, -0.9898412824f, -0.9900581837f, -0.99027282f, -0.9904850721f, -0.9906949997f, -0.9909026623f, -0.9911079407f, -0.9913108349f, -0.9915114641f, -0.9917097688f, -0.9919056892f, -0.9920992851f, -0.992290616f, -0.9924795628f, -0.9926661253f, -0.9928504229f, -0.9930323362f, -0.993211925f, -0.9933891892f, -0.9935641289f, -0.9937367439f, -0.9939069748f, -0.9940748811f, -0.9942404628f, -0.9944036603f, -0.9945645928f, -0.9947231412f, -0.9948793054f, -0.9950332046f, -0.9951847196f, -0.99533391f, -0.9954807758f, -0.9956252575f, -0.9957674146f, -0.9959072471f, -0.9960446954f, -0.9961798191f, -0.9963126183f, -0.9964430332f, -0.9965711236f, -0.9966968894f, -0.996820271f, -0.996941328f, -0.9970600605f, -0.9971764088f, -0.9972904325f, -0.9974021316f, -0.9975114465f, -0.9976184368f, -0.997723043f, -0.9978253245f, -0.9979252815f, -0.9980228543f, -0.9981181026f, -0.9982110262f, -0.9983015656f, -0.9983897209f, -0.9984755516f, -0.9985590577f, -0.9986402392f, -0.9987190366f, -0.9987954497f, -0.9988695383f, -0.9989413023f, -0.9990106821f, -0.9990777373f, -0.9991424084f, -0.9992047548f, -0.9992647767f, -0.9993223548f, -0.9993776679f, -0.9994305968f, -0.9994812012f, -0.9995294213f, -0.9995753169f, -0.9996188283f, -0.9996600151f, -0.9996988177f, -0.9997352958f, -0.9997693896f, -0.9998011589f, -0.9998306036f, -0.9998576641f, -0.9998823404f, -0.9999046922f, -0.9999247193f, -0.9999423623f, -0.9999576211f, -0.9999706149f, -0.9999811649f, -0.9999893904f, -0.9999952912f, -0.9999988079f}; -constant float ts02[512] = {-0.0f, -0.004601926077f, -0.009203754365f, -0.01380538847f, -0.01840673015f, -0.02300768159f, -0.02760814503f, -0.03220802546f, -0.03680722415f, -0.04140564054f, -0.04600318149f, -0.05059975013f, -0.05519524589f, -0.05978957191f, -0.06438262761f, -0.06897433102f, -0.07356456667f, -0.07815324515f, -0.08274026215f, -0.08732553571f, -0.09190895408f, -0.09649042785f, -0.1010698602f, -0.1056471542f, -0.1102222055f, -0.1147949249f, -0.1193652153f, -0.1239329726f, -0.1284981072f, -0.1330605298f, -0.1376201212f, -0.1421768069f, -0.1467304677f, -0.1512810439f, -0.1558284014f, -0.1603724509f, -0.1649131179f, -0.169450298f, -0.1739838719f, -0.1785137653f, -0.1830398887f, -0.1875621229f, -0.1920803934f, -0.1965945959f, -0.201104641f, -0.2056104094f, -0.2101118416f, -0.2146088183f, -0.2191012353f, -0.2235890329f, -0.228072077f, -0.2325503081f, -0.2370236069f, -0.241491884f, -0.24595505f, -0.2504130006f, -0.2548656464f, -0.2593129277f, -0.2637546659f, -0.2681908607f, -0.2726213634f, -0.2770460844f, -0.2814649343f, -0.2858778238f, -0.2902846634f, -0.2946853638f, -0.2990798354f, -0.3034679592f, -0.3078496456f, -0.3122248054f, -0.3165933788f, -0.3209552467f, -0.3253102899f, -0.3296584487f, -0.3339996636f, -0.3383337557f, -0.3426607251f, -0.3469804227f, -0.3512927592f, -0.3555976748f, -0.3598950505f, -0.3641847968f, -0.3684668243f, -0.3727410734f, -0.3770074248f, -0.3812657595f, -0.3855160475f, -0.3897581697f, -0.3939920366f, -0.3982175589f, -0.4024346471f, -0.4066432118f, -0.4108431637f, -0.4150344133f, -0.4192169011f, -0.4233904779f, -0.4275550842f, -0.4317106605f, -0.4358570874f, -0.4399942756f, -0.4441221356f, -0.448240608f, -0.4523495734f, -0.4564489722f, -0.4605387151f, -0.4646186829f, -0.4686888158f, -0.4727490246f, -0.4767992198f, -0.4808393419f, -0.4848692417f, -0.4888888896f, -0.492898196f, -0.4968970418f, -0.5008853674f, -0.5048630834f, -0.5088301301f, -0.5127863884f, -0.5167317986f, -0.5206662416f, -0.5245896578f, -0.5285019875f, -0.5324031115f, -0.5362929702f, -0.5401714444f, -0.5440385342f, -0.5478940606f, -0.5517379642f, -0.5555702448f, -0.5593907237f, -0.5631993413f, -0.566996038f, -0.5707807541f, -0.5745533705f, -0.5783137679f, -0.582062006f, -0.5857978463f, -0.5895212889f, -0.5932322741f, -0.5969306827f, -0.6006164551f, -0.6042895317f, -0.6079497933f, -0.6115971804f, -0.6152315736f, -0.618852973f, -0.6224612594f, -0.6260563731f, -0.6296382546f, -0.6332067847f, -0.6367618442f, -0.6403034925f, -0.6438315511f, -0.6473459601f, -0.6508466601f, -0.6543335915f, -0.6578066945f, -0.6612658501f, -0.6647109985f, -0.6681420207f, -0.6715589762f, -0.6749616265f, -0.6783500314f, -0.6817240715f, -0.6850836873f, -0.6884287596f, -0.6917592287f, -0.6950750947f, -0.6983762383f, -0.7016626f, -0.7049340606f, -0.7081906199f, -0.7114322186f, -0.7146586776f, -0.7178700566f, -0.7210661769f, -0.724247098f, -0.727412641f, -0.7305627465f, -0.7336974144f, -0.7368165851f, -0.7399200797f, -0.7430079579f, -0.7460801005f, -0.7491363883f, -0.7521768212f, -0.7552013993f, -0.7582098842f, -0.761202395f, -0.7641787529f, -0.7671388984f, -0.7700828314f, -0.7730104327f, -0.7759217024f, -0.7788165212f, -0.7816948295f, -0.7845565677f, -0.7874017358f, -0.7902302146f, -0.7930419445f, -0.7958369255f, -0.7986149788f, -0.801376164f, -0.8041203618f, -0.8068475723f, -0.8095576167f, -0.8122506142f, -0.8149263263f, -0.8175848126f, -0.8202259541f, -0.8228498101f, -0.8254561424f, -0.8280450702f, -0.8306164145f, -0.8331701756f, -0.8357062936f, -0.838224709f, -0.8407253623f, -0.8432082534f, -0.8456732631f, -0.8481203318f, -0.8505494595f, -0.8529605865f, -0.8553536534f, -0.8577286005f, -0.8600853682f, -0.8624239564f, -0.864744246f, -0.867046237f, -0.8693298697f, -0.8715950847f, -0.8738418221f, -0.8760700822f, -0.8782798052f, -0.8804708719f, -0.882643342f, -0.8847970963f, -0.8869321346f, -0.8890483379f, -0.8911457658f, -0.893224299f, -0.8952839375f, -0.8973245621f, -0.8993462324f, -0.9013488293f, -0.9033323526f, -0.9052967429f, -0.9072420001f, -0.909168005f, -0.9110747576f, -0.9129621983f, -0.914830327f, -0.9166790843f, -0.9185084105f, -0.9203183055f, -0.9221086502f, -0.9238795042f, -0.9256308079f, -0.9273625016f, -0.9290745854f, -0.9307669401f, -0.9324396253f, -0.9340925217f, -0.9357256889f, -0.9373390079f, -0.9389324784f, -0.940506041f, -0.9420597553f, -0.9435934424f, -0.9451072216f, -0.946600914f, -0.9480745792f, -0.9495281577f, -0.9509616494f, -0.9523749948f, -0.9537681937f, -0.9551411867f, -0.9564939141f, -0.9578264356f, -0.9591386318f, -0.9604305029f, -0.9617020488f, -0.9629532695f, -0.9641840458f, -0.9653944373f, -0.9665843844f, -0.9677538276f, -0.9689028263f, -0.9700312614f, -0.971139133f, -0.9722265005f, -0.9732932448f, -0.974339366f, -0.9753648639f, -0.9763697386f, -0.9773538709f, -0.97831738f, -0.9792601466f, -0.9801821113f, -0.9810833931f, -0.9819638729f, -0.9828235507f, -0.9836624265f, -0.9844804406f, -0.9852776527f, -0.9860539436f, -0.9868093729f, -0.9875439405f, -0.988257587f, -0.9889502525f, -0.9896219969f, -0.99027282f, -0.9909026623f, -0.9915114641f, -0.9920992851f, -0.9926661253f, -0.993211925f, -0.9937367439f, -0.9942404628f, -0.9947231412f, -0.9951847196f, -0.9956252575f, -0.9960446954f, -0.9964430332f, -0.996820271f, -0.9971764088f, -0.9975114465f, -0.9978253245f, -0.9981181026f, -0.9983897209f, -0.9986402392f, -0.9988695383f, -0.9990777373f, -0.9992647767f, -0.9994305968f, -0.9995753169f, -0.9996988177f, -0.9998011589f, -0.9998823404f, -0.9999423623f, -0.9999811649f, -0.9999988079f, -0.9999952912f, -0.9999706149f, -0.9999247193f, -0.9998576641f, -0.9997693896f, -0.9996600151f, -0.9995294213f, -0.9993776679f, -0.9992047548f, -0.9990106821f, -0.9987954497f, -0.9985590577f, -0.9983015656f, -0.9980228543f, -0.997723043f, -0.9974021316f, -0.9970600605f, -0.9966968894f, -0.9963126183f, -0.9959072471f, -0.9954807758f, -0.9950332046f, -0.9945645928f, -0.9940748811f, -0.9935641289f, -0.9930323362f, -0.9924795628f, -0.9919056892f, -0.9913108349f, -0.9906949997f, -0.9900581837f, -0.9894004464f, -0.9887216687f, -0.9880220294f, -0.9873014092f, -0.9865599275f, -0.9857975245f, -0.9850142598f, -0.9842100739f, -0.9833850861f, -0.9825392962f, -0.9816727042f, -0.9807852507f, -0.9798771143f, -0.9789481759f, -0.9779984951f, -0.9770281315f, -0.9760370851f, -0.9750253558f, -0.9739929438f, -0.9729399681f, -0.9718663096f, -0.9707721472f, -0.9696573615f, -0.9685220718f, -0.9673662782f, -0.9661899805f, -0.9649932384f, -0.963776052f, -0.9625384808f, -0.9612804651f, -0.9600021243f, -0.9587034583f, -0.9573845267f, -0.95604527f, -0.9546857476f, -0.9533060193f, -0.9519061446f, -0.950486064f, -0.9490458965f, -0.9475855827f, -0.9461052418f, -0.9446048141f, -0.9430844188f, -0.9415440559f, -0.9399837255f, -0.9384035468f, -0.9368034601f, -0.9351835251f, -0.9335438013f, -0.9318842888f, -0.9302050471f, -0.9285060763f, -0.9267874956f, -0.9250492454f, -0.9232914448f, -0.9215140343f, -0.919717133f, -0.9179008007f, -0.9160649776f, -0.9142097831f, -0.9123351574f, -0.9104412794f, -0.9085280895f, -0.9065957069f, -0.9046440721f, -0.9026733041f, -0.900683403f, -0.8986744881f, -0.8966464996f, -0.8945994973f, -0.8925335407f, -0.8904487491f, -0.8883450627f, -0.8862225413f, -0.8840812445f, -0.8819212914f, -0.8797426224f, -0.8775452971f, -0.8753293753f, -0.8730949759f, -0.8708420396f, -0.8685706854f, -0.866280973f, -0.8639728427f, -0.8616464734f, -0.8593018055f, -0.8569389582f, -0.854557991f, -0.8521589041f, -0.8497417569f, -0.8473066092f, -0.84485358f, -0.8423826098f, -0.8398938179f, -0.8373872042f, -0.8348628879f, -0.832320869f, -0.8297612071f, -0.8271840215f, -0.8245893121f, -0.8219771385f, -0.8193475008f, -0.8167005777f, -0.8140363097f, -0.8113548756f, -0.8086561561f, -0.8059403896f, -0.8032075167f, -0.8004576564f, -0.7976908684f, -0.7949071527f, -0.7921065688f, -0.7892892361f, -0.786455214f, -0.7836045027f, -0.7807372212f, -0.7778534293f, -0.7749531269f, -0.7720363736f, -0.7691033483f, -0.7661539912f, -0.7631884217f, -0.7602066994f, -0.7572088242f, -0.7541949749f, -0.7511651516f, -0.7481193542f, -0.7450577617f, -0.7419804335f, -0.73888731f, -0.7357785702f, -0.7326542735f, -0.72951442f, -0.726359129f, -0.7231884599f, -0.720002532f, -0.7168012857f, -0.7135848403f, -0.7103533745f}; -constant float ts05[512] = {-0.7071067691f, -0.7038452625f, -0.7005687952f, -0.6972774863f, -0.6939714551f, -0.6906507015f, -0.6873153448f, -0.683965385f, -0.6806010008f, -0.6772221923f, -0.6738290191f, -0.6704215407f, -0.6669999361f, -0.6635641456f, -0.6601143479f, -0.6566505432f, -0.6531728506f, -0.6496813297f, -0.6461760402f, -0.6426570415f, -0.6391244531f, -0.6355783343f, -0.6320187449f, -0.6284457445f, -0.6248595119f, -0.6212599874f, -0.6176472902f, -0.6140215397f, -0.6103827953f, -0.6067311168f, -0.6030666232f, -0.5993893147f, -0.5956993103f, -0.5919966698f, -0.5882815719f, -0.584553957f, -0.5808139443f, -0.5770616531f, -0.573297143f, -0.5695205331f, -0.5657318234f, -0.5619311333f, -0.5581185222f, -0.5542941093f, -0.5504579544f, -0.5466101766f, -0.5427507758f, -0.538879931f, -0.534997642f, -0.5311040282f, -0.5271991491f, -0.523283124f, -0.5193560123f, -0.5154178739f, -0.5114688277f, -0.5075089931f, -0.5035383701f, -0.4995571077f, -0.4955652654f, -0.4915629029f, -0.4875501692f, -0.4835270643f, -0.479493767f, -0.4754502773f, -0.4713967443f, -0.4673331976f, -0.4632597864f, -0.4591765404f, -0.4550835788f, -0.4509809911f, -0.4468688369f, -0.4427472353f, -0.438616246f, -0.4344759583f, -0.4303264916f, -0.4261678755f, -0.4220002592f, -0.4178237021f, -0.4136383235f, -0.4094441533f, -0.4052413106f, -0.4010298848f, -0.3968099952f, -0.3925816715f, -0.3883450329f, -0.3841001987f, -0.3798471987f, -0.3755861819f, -0.3713172078f, -0.3670403361f, -0.3627557158f, -0.3584634066f, -0.3541635275f, -0.3498561382f, -0.3455413282f, -0.3412192166f, -0.336889863f, -0.3325533569f, -0.3282098472f, -0.3238593638f, -0.3195020258f, -0.3151379228f, -0.310767144f, -0.3063898087f, -0.3020059466f, -0.2976157069f, -0.2932191491f, -0.2888164222f, -0.2844075263f, -0.27999264f, -0.2755718231f, -0.271145165f, -0.266712755f, -0.2622747123f, -0.2578310966f, -0.2533820271f, -0.2489276081f, -0.2444678992f, -0.2400030196f, -0.2355330586f, -0.2310581058f, -0.2265782654f, -0.2220936269f, -0.2176042795f, -0.2131103128f, -0.208611846f, -0.2041089684f, -0.1996017545f, -0.1950903237f, -0.1905747503f, -0.1860551536f, -0.1815316081f, -0.1770042181f, -0.1724730879f, -0.167938292f, -0.1633999497f, -0.1588581502f, -0.1543129683f, -0.1497645378f, -0.1452129185f, -0.1406582445f, -0.1361005753f, -0.1315400302f, -0.1269766986f, -0.1224106774f, -0.1178420633f, -0.1132709533f, -0.1086974442f, -0.1041216329f, -0.09954361618f, -0.09496349841f, -0.09038136154f, -0.08579730988f, -0.08121144772f, -0.07662386447f, -0.07203464955f, -0.06744392216f, -0.06285175681f, -0.05825826526f, -0.05366353691f, -0.04906767607f, -0.04447077215f, -0.03987292573f, -0.03527423739f, -0.030674804f, -0.02607471868f, -0.02147408016f, -0.01687298715f, -0.01227153838f, -0.007669828832f, -0.003067956772f, 0.001533980132f, 0.006135884672f, 0.01073765941f, 0.01533920597f, 0.01994042844f, 0.02454122901f, 0.02914150804f, 0.0337411724f, 0.03834012151f, 0.0429382585f, 0.04753548279f, 0.05213170499f, 0.05672682077f, 0.061320737f, 0.06591334939f, 0.07050457597f, 0.07509429753f, 0.07968243957f, 0.08426889032f, 0.08885355294f, 0.09343633801f, 0.09801714122f, 0.1025958657f, 0.1071724221f, 0.1117467135f, 0.1163186282f, 0.1208880842f, 0.1254549772f, 0.1300192177f, 0.1345807016f, 0.1391393393f, 0.1436950266f, 0.1482476741f, 0.1527971923f, 0.1573434621f, 0.161886394f, 0.1664258987f, 0.1709618866f, 0.1754942536f, 0.1800228953f, 0.1845477372f, 0.1890686601f, 0.1935855895f, 0.1980984062f, 0.2026070356f, 0.2071113735f, 0.2116113305f, 0.2161068022f, 0.2205976844f, 0.2250839174f, 0.2295653671f, 0.234041959f, 0.2385135889f, 0.2429801822f, 0.2474416196f, 0.2518978119f, 0.2563486695f, 0.2607941031f, 0.2652340233f, 0.2696683109f, 0.2740969062f, 0.27851969f, 0.282936573f, 0.2873474658f, 0.291752249f, 0.296150893f, 0.3005432487f, 0.3049292266f, 0.3093087673f, 0.3136817515f, 0.3180480897f, 0.3224076927f, 0.3267604411f, 0.3311063051f, 0.3354451358f, 0.3397768736f, 0.344101429f, 0.3484186828f, 0.3527285457f, 0.3570309579f, 0.3613258004f, 0.3656129837f, 0.3698924482f, 0.3741640747f, 0.3784277439f, 0.3826834261f, 0.3869310021f, 0.3911703825f, 0.3954014778f, 0.3996241987f, 0.4038384557f, 0.4080441594f, 0.4122412205f, 0.4164295495f, 0.4206090868f, 0.4247796834f, 0.4289412796f, 0.433093816f, 0.4372371733f, 0.4413712621f, 0.4454960227f, 0.449611336f, 0.4537171125f, 0.4578132927f, 0.4618997872f, 0.4659765065f, 0.4700433314f, 0.4741002023f, 0.4781470597f, 0.4821837842f, 0.4862102866f, 0.4902264774f, 0.4942322969f, 0.4982276559f, 0.5022124648f, 0.5061866641f, 0.510150075f, 0.514102757f, 0.5180445313f, 0.5219752789f, 0.5258949995f, 0.5298036337f, 0.5337010026f, 0.5375870466f, 0.5414617658f, 0.5453249812f, 0.5491766334f, 0.5530167222f, 0.5568450093f, 0.5606615543f, 0.564466238f, 0.5682589412f, 0.5720396042f, 0.5758081675f, 0.5795645714f, 0.5833086371f, 0.5870403647f, 0.5907596946f, 0.5944665074f, 0.5981606841f, 0.6018422246f, 0.6055110693f, 0.6091670394f, 0.6128100753f, 0.616440177f, 0.6200572252f, 0.6236611009f, 0.6272518039f, 0.630829215f, 0.6343932748f, 0.6379439235f, 0.6414810419f, 0.6450045109f, 0.64851439f, 0.65201056f, 0.6554928422f, 0.6589612961f, 0.6624158025f, 0.6658562422f, 0.6692826152f, 0.6726947427f, 0.6760926843f, 0.6794763207f, 0.6828455329f, 0.6862003207f, 0.689540565f, 0.6928661466f, 0.696177125f, 0.6994733214f, 0.7027547359f, 0.7060212493f, 0.7092728019f, 0.7125093937f, 0.7157308459f, 0.718937099f, 0.7221282125f, 0.7253039479f, 0.728464365f, 0.7316094041f, 0.7347388864f, 0.7378528118f, 0.7409511209f, 0.7440337539f, 0.7471005917f, 0.7501516342f, 0.7531868219f, 0.756205976f, 0.7592092156f, 0.7621963024f, 0.7651672363f, 0.7681220174f, 0.7710605264f, 0.7739827037f, 0.7768884897f, 0.7797777653f, 0.7826505899f, 0.7855068445f, 0.7883464098f, 0.7911693454f, 0.7939754725f, 0.796764791f, 0.7995372415f, 0.8022928238f, 0.8050313592f, 0.8077528477f, 0.81045717f, 0.8131443858f, 0.8158144355f, 0.8184671402f, 0.8211025f, 0.8237205148f, 0.8263210654f, 0.8289040923f, 0.8314695954f, 0.8340175152f, 0.8365477324f, 0.8390602469f, 0.8415549994f, 0.8440318704f, 0.8464909196f, 0.8489320278f, 0.851355195f, 0.8537603021f, 0.8561473489f, 0.8585162163f, 0.8608669639f, 0.8631994128f, 0.8655136228f, 0.8678094745f, 0.8700869679f, 0.8723460436f, 0.8745866418f, 0.8768087029f, 0.8790122271f, 0.8811970949f, 0.8833633661f, 0.8855108619f, 0.8876396418f, 0.8897495866f, 0.8918406963f, 0.893912971f, 0.8959662318f, 0.898000598f, 0.9000158906f, 0.9020121694f, 0.903989315f, 0.905947268f, 0.9078860879f, 0.9098057151f, 0.9117060304f, 0.9135870337f, 0.9154487252f, 0.9172909856f, 0.9191138744f, 0.920917213f, 0.9227011204f, 0.9244654775f, 0.9262102246f, 0.9279354215f, 0.9296408892f, 0.9313266873f, 0.932992816f, 0.9346391559f, 0.9362656474f, 0.9378723502f, 0.9394592047f, 0.9410261512f, 0.9425731897f, 0.9441002607f, 0.9456073046f, 0.9470943809f, 0.9485613704f, 0.9500082731f, 0.9514350295f, 0.9528416395f, 0.9542281032f, 0.9555943608f, 0.9569403529f, 0.9582660794f, 0.9595715404f, 0.9608566165f, 0.9621214271f, 0.9633657932f, 0.9645897746f, 0.9657933712f, 0.9669764638f, 0.968139112f, 0.9692812562f, 0.9704028368f, 0.9715039134f, 0.9725843668f, 0.9736442566f, 0.9746835232f, 0.975702107f, 0.9767000675f, 0.9776773453f, 0.9786339402f, 0.9795697927f, 0.9804848433f, 0.9813792109f, 0.982252717f, 0.9831054807f, 0.9839374423f, 0.9847484827f, 0.9855387211f, 0.9863080978f, 0.9870565534f, 0.9877841473f, 0.9884908199f, 0.9891765118f, 0.9898412824f, 0.9904850721f, 0.9911079407f, 0.9917097688f, 0.992290616f, 0.9928504229f, 0.9933891892f, 0.9939069748f, 0.9944036603f, 0.9948793054f, 0.99533391f, 0.9957674146f, 0.9961798191f, 0.9965711236f, 0.996941328f, 0.9972904325f, 0.9976184368f, 0.9979252815f, 0.9982110262f, 0.9984755516f, 0.9987190366f, 0.9989413023f, 0.9991424084f, 0.9993223548f, 0.9994812012f, 0.9996188283f, 0.9997352958f, 0.9998306036f, 0.9999046922f, 0.9999576211f, 0.9999893904f}; -constant float ts10[512] = {-0.0f, -0.01227153838f, -0.02454122901f, -0.03680722415f, -0.04906767607f, -0.061320737f, -0.07356456667f, -0.08579730988f, -0.09801714122f, -0.1102222055f, -0.1224106774f, -0.1345807016f, -0.1467304677f, -0.1588581502f, -0.1709618866f, -0.1830398887f, -0.1950903237f, -0.2071113735f, -0.2191012353f, -0.2310581058f, -0.2429801822f, -0.2548656464f, -0.266712755f, -0.27851969f, -0.2902846634f, -0.3020059466f, -0.3136817515f, -0.3253102899f, -0.336889863f, -0.3484186828f, -0.3598950505f, -0.3713172078f, -0.3826834261f, -0.3939920366f, -0.4052413106f, -0.4164295495f, -0.4275550842f, -0.438616246f, -0.449611336f, -0.4605387151f, -0.4713967443f, -0.4821837842f, -0.492898196f, -0.5035383701f, -0.514102757f, -0.5245896578f, -0.534997642f, -0.5453249812f, -0.5555702448f, -0.5657318234f, -0.5758081675f, -0.5857978463f, -0.5956993103f, -0.6055110693f, -0.6152315736f, -0.6248595119f, -0.6343932748f, -0.6438315511f, -0.6531728506f, -0.6624158025f, -0.6715589762f, -0.6806010008f, -0.689540565f, -0.6983762383f, -0.7071067691f, -0.7157308459f, -0.724247098f, -0.7326542735f, -0.7409511209f, -0.7491363883f, -0.7572088242f, -0.7651672363f, -0.7730104327f, -0.7807372212f, -0.7883464098f, -0.7958369255f, -0.8032075167f, -0.81045717f, -0.8175848126f, -0.8245893121f, -0.8314695954f, -0.838224709f, -0.84485358f, -0.851355195f, -0.8577286005f, -0.8639728427f, -0.8700869679f, -0.8760700822f, -0.8819212914f, -0.8876396418f, -0.893224299f, -0.8986744881f, -0.903989315f, -0.909168005f, -0.9142097831f, -0.9191138744f, -0.9238795042f, -0.9285060763f, -0.932992816f, -0.9373390079f, -0.9415440559f, -0.9456073046f, -0.9495281577f, -0.9533060193f, -0.9569403529f, -0.9604305029f, -0.963776052f, -0.9669764638f, -0.9700312614f, -0.9729399681f, -0.975702107f, -0.97831738f, -0.9807852507f, -0.9831054807f, -0.9852776527f, -0.9873014092f, -0.9891765118f, -0.9909026623f, -0.9924795628f, -0.9939069748f, -0.9951847196f, -0.9963126183f, -0.9972904325f, -0.9981181026f, -0.9987954497f, -0.9993223548f, -0.9996988177f, -0.9999247193f, -1.0f, -0.9999247193f, -0.9996988177f, -0.9993223548f, -0.9987954497f, -0.9981181026f, -0.9972904325f, -0.9963126183f, -0.9951847196f, -0.9939069748f, -0.9924795628f, -0.9909026623f, -0.9891765118f, -0.9873014092f, -0.9852776527f, -0.9831054807f, -0.9807852507f, -0.97831738f, -0.975702107f, -0.9729399681f, -0.9700312614f, -0.9669764638f, -0.963776052f, -0.9604305029f, -0.9569403529f, -0.9533060193f, -0.9495281577f, -0.9456073046f, -0.9415440559f, -0.9373390079f, -0.932992816f, -0.9285060763f, -0.9238795042f, -0.9191138744f, -0.9142097831f, -0.909168005f, -0.903989315f, -0.8986744881f, -0.893224299f, -0.8876396418f, -0.8819212914f, -0.8760700822f, -0.8700869679f, -0.8639728427f, -0.8577286005f, -0.851355195f, -0.84485358f, -0.838224709f, -0.8314695954f, -0.8245893121f, -0.8175848126f, -0.81045717f, -0.8032075167f, -0.7958369255f, -0.7883464098f, -0.7807372212f, -0.7730104327f, -0.7651672363f, -0.7572088242f, -0.7491363883f, -0.7409511209f, -0.7326542735f, -0.724247098f, -0.7157308459f, -0.7071067691f, -0.6983762383f, -0.689540565f, -0.6806010008f, -0.6715589762f, -0.6624158025f, -0.6531728506f, -0.6438315511f, -0.6343932748f, -0.6248595119f, -0.6152315736f, -0.6055110693f, -0.5956993103f, -0.5857978463f, -0.5758081675f, -0.5657318234f, -0.5555702448f, -0.5453249812f, -0.534997642f, -0.5245896578f, -0.514102757f, -0.5035383701f, -0.492898196f, -0.4821837842f, -0.4713967443f, -0.4605387151f, -0.449611336f, -0.438616246f, -0.4275550842f, -0.4164295495f, -0.4052413106f, -0.3939920366f, -0.3826834261f, -0.3713172078f, -0.3598950505f, -0.3484186828f, -0.336889863f, -0.3253102899f, -0.3136817515f, -0.3020059466f, -0.2902846634f, -0.27851969f, -0.266712755f, -0.2548656464f, -0.2429801822f, -0.2310581058f, -0.2191012353f, -0.2071113735f, -0.1950903237f, -0.1830398887f, -0.1709618866f, -0.1588581502f, -0.1467304677f, -0.1345807016f, -0.1224106774f, -0.1102222055f, -0.09801714122f, -0.08579730988f, -0.07356456667f, -0.061320737f, -0.04906767607f, -0.03680722415f, -0.02454122901f, -0.01227153838f, -0.0f, -0.01227153838f, -0.02454122901f, -0.03680722415f, -0.04906767607f, -0.061320737f, -0.07356456667f, -0.08579730988f, -0.09801714122f, -0.1102222055f, -0.1224106774f, -0.1345807016f, -0.1467304677f, -0.1588581502f, -0.1709618866f, -0.1830398887f, -0.1950903237f, -0.2071113735f, -0.2191012353f, -0.2310581058f, -0.2429801822f, -0.2548656464f, -0.266712755f, -0.27851969f, -0.2902846634f, -0.3020059466f, -0.3136817515f, -0.3253102899f, -0.336889863f, -0.3484186828f, -0.3598950505f, -0.3713172078f, -0.3826834261f, -0.3939920366f, -0.4052413106f, -0.4164295495f, -0.4275550842f, -0.438616246f, -0.449611336f, -0.4605387151f, -0.4713967443f, -0.4821837842f, -0.492898196f, -0.5035383701f, -0.514102757f, -0.5245896578f, -0.534997642f, -0.5453249812f, -0.5555702448f, -0.5657318234f, -0.5758081675f, -0.5857978463f, -0.5956993103f, -0.6055110693f, -0.6152315736f, -0.6248595119f, -0.6343932748f, -0.6438315511f, -0.6531728506f, -0.6624158025f, -0.6715589762f, -0.6806010008f, -0.689540565f, -0.6983762383f, -0.7071067691f, -0.7157308459f, -0.724247098f, -0.7326542735f, -0.7409511209f, -0.7491363883f, -0.7572088242f, -0.7651672363f, -0.7730104327f, -0.7807372212f, -0.7883464098f, -0.7958369255f, -0.8032075167f, -0.81045717f, -0.8175848126f, -0.8245893121f, -0.8314695954f, -0.838224709f, -0.84485358f, -0.851355195f, -0.8577286005f, -0.8639728427f, -0.8700869679f, -0.8760700822f, -0.8819212914f, -0.8876396418f, -0.893224299f, -0.8986744881f, -0.903989315f, -0.909168005f, -0.9142097831f, -0.9191138744f, -0.9238795042f, -0.9285060763f, -0.932992816f, -0.9373390079f, -0.9415440559f, -0.9456073046f, -0.9495281577f, -0.9533060193f, -0.9569403529f, -0.9604305029f, -0.963776052f, -0.9669764638f, -0.9700312614f, -0.9729399681f, -0.975702107f, -0.97831738f, -0.9807852507f, -0.9831054807f, -0.9852776527f, -0.9873014092f, -0.9891765118f, -0.9909026623f, -0.9924795628f, -0.9939069748f, -0.9951847196f, -0.9963126183f, -0.9972904325f, -0.9981181026f, -0.9987954497f, -0.9993223548f, -0.9996988177f, -0.9999247193f, -1.0f, -0.9999247193f, -0.9996988177f, -0.9993223548f, -0.9987954497f, -0.9981181026f, -0.9972904325f, -0.9963126183f, -0.9951847196f, -0.9939069748f, -0.9924795628f, -0.9909026623f, -0.9891765118f, -0.9873014092f, -0.9852776527f, -0.9831054807f, -0.9807852507f, -0.97831738f, -0.975702107f, -0.9729399681f, -0.9700312614f, -0.9669764638f, -0.963776052f, -0.9604305029f, -0.9569403529f, -0.9533060193f, -0.9495281577f, -0.9456073046f, -0.9415440559f, -0.9373390079f, -0.932992816f, -0.9285060763f, -0.9238795042f, -0.9191138744f, -0.9142097831f, -0.909168005f, -0.903989315f, -0.8986744881f, -0.893224299f, -0.8876396418f, -0.8819212914f, -0.8760700822f, -0.8700869679f, -0.8639728427f, -0.8577286005f, -0.851355195f, -0.84485358f, -0.838224709f, -0.8314695954f, -0.8245893121f, -0.8175848126f, -0.81045717f, -0.8032075167f, -0.7958369255f, -0.7883464098f, -0.7807372212f, -0.7730104327f, -0.7651672363f, -0.7572088242f, -0.7491363883f, -0.7409511209f, -0.7326542735f, -0.724247098f, -0.7157308459f, -0.7071067691f, -0.6983762383f, -0.689540565f, -0.6806010008f, -0.6715589762f, -0.6624158025f, -0.6531728506f, -0.6438315511f, -0.6343932748f, -0.6248595119f, -0.6152315736f, -0.6055110693f, -0.5956993103f, -0.5857978463f, -0.5758081675f, -0.5657318234f, -0.5555702448f, -0.5453249812f, -0.534997642f, -0.5245896578f, -0.514102757f, -0.5035383701f, -0.492898196f, -0.4821837842f, -0.4713967443f, -0.4605387151f, -0.449611336f, -0.438616246f, -0.4275550842f, -0.4164295495f, -0.4052413106f, -0.3939920366f, -0.3826834261f, -0.3713172078f, -0.3598950505f, -0.3484186828f, -0.336889863f, -0.3253102899f, -0.3136817515f, -0.3020059466f, -0.2902846634f, -0.27851969f, -0.266712755f, -0.2548656464f, -0.2429801822f, -0.2310581058f, -0.2191012353f, -0.2071113735f, -0.1950903237f, -0.1830398887f, -0.1709618866f, -0.1588581502f, -0.1467304677f, -0.1345807016f, -0.1224106774f, -0.1102222055f, -0.09801714122f, -0.08579730988f, -0.07356456667f, -0.061320737f, -0.04906767607f, -0.03680722415f, -0.02454122901f, -0.01227153838f}; -constant float ts13[512] = {-0.0f, -0.01227153838f, -0.02454122901f, -0.03680722415f, -0.04906767607f, -0.061320737f, -0.07356456667f, -0.08579730988f, -0.09801714122f, -0.1102222055f, -0.1224106774f, -0.1345807016f, -0.1467304677f, -0.1588581502f, -0.1709618866f, -0.1830398887f, -0.1950903237f, -0.2071113735f, -0.2191012353f, -0.2310581058f, -0.2429801822f, -0.2548656464f, -0.266712755f, -0.27851969f, -0.2902846634f, -0.3020059466f, -0.3136817515f, -0.3253102899f, -0.336889863f, -0.3484186828f, -0.3598950505f, -0.3713172078f, -0.3826834261f, -0.3939920366f, -0.4052413106f, -0.4164295495f, -0.4275550842f, -0.438616246f, -0.449611336f, -0.4605387151f, -0.4713967443f, -0.4821837842f, -0.492898196f, -0.5035383701f, -0.514102757f, -0.5245896578f, -0.534997642f, -0.5453249812f, -0.5555702448f, -0.5657318234f, -0.5758081675f, -0.5857978463f, -0.5956993103f, -0.6055110693f, -0.6152315736f, -0.6248595119f, -0.6343932748f, -0.6438315511f, -0.6531728506f, -0.6624158025f, -0.6715589762f, -0.6806010008f, -0.689540565f, -0.6983762383f, -0.7071067691f, -0.7157308459f, -0.724247098f, -0.7326542735f, -0.7409511209f, -0.7491363883f, -0.7572088242f, -0.7651672363f, -0.7730104327f, -0.7807372212f, -0.7883464098f, -0.7958369255f, -0.8032075167f, -0.81045717f, -0.8175848126f, -0.8245893121f, -0.8314695954f, -0.838224709f, -0.84485358f, -0.851355195f, -0.8577286005f, -0.8639728427f, -0.8700869679f, -0.8760700822f, -0.8819212914f, -0.8876396418f, -0.893224299f, -0.8986744881f, -0.903989315f, -0.909168005f, -0.9142097831f, -0.9191138744f, -0.9238795042f, -0.9285060763f, -0.932992816f, -0.9373390079f, -0.9415440559f, -0.9456073046f, -0.9495281577f, -0.9533060193f, -0.9569403529f, -0.9604305029f, -0.963776052f, -0.9669764638f, -0.9700312614f, -0.9729399681f, -0.975702107f, -0.97831738f, -0.9807852507f, -0.9831054807f, -0.9852776527f, -0.9873014092f, -0.9891765118f, -0.9909026623f, -0.9924795628f, -0.9939069748f, -0.9951847196f, -0.9963126183f, -0.9972904325f, -0.9981181026f, -0.9987954497f, -0.9993223548f, -0.9996988177f, -0.9999247193f, -1.0f, -0.9999247193f, -0.9996988177f, -0.9993223548f, -0.9987954497f, -0.9981181026f, -0.9972904325f, -0.9963126183f, -0.9951847196f, -0.9939069748f, -0.9924795628f, -0.9909026623f, -0.9891765118f, -0.9873014092f, -0.9852776527f, -0.9831054807f, -0.9807852507f, -0.97831738f, -0.975702107f, -0.9729399681f, -0.9700312614f, -0.9669764638f, -0.963776052f, -0.9604305029f, -0.9569403529f, -0.9533060193f, -0.9495281577f, -0.9456073046f, -0.9415440559f, -0.9373390079f, -0.932992816f, -0.9285060763f, -0.9238795042f, -0.9191138744f, -0.9142097831f, -0.909168005f, -0.903989315f, -0.8986744881f, -0.893224299f, -0.8876396418f, -0.8819212914f, -0.8760700822f, -0.8700869679f, -0.8639728427f, -0.8577286005f, -0.851355195f, -0.84485358f, -0.838224709f, -0.8314695954f, -0.8245893121f, -0.8175848126f, -0.81045717f, -0.8032075167f, -0.7958369255f, -0.7883464098f, -0.7807372212f, -0.7730104327f, -0.7651672363f, -0.7572088242f, -0.7491363883f, -0.7409511209f, -0.7326542735f, -0.724247098f, -0.7157308459f, -0.7071067691f, -0.6983762383f, -0.689540565f, -0.6806010008f, -0.6715589762f, -0.6624158025f, -0.6531728506f, -0.6438315511f, -0.6343932748f, -0.6248595119f, -0.6152315736f, -0.6055110693f, -0.5956993103f, -0.5857978463f, -0.5758081675f, -0.5657318234f, -0.5555702448f, -0.5453249812f, -0.534997642f, -0.5245896578f, -0.514102757f, -0.5035383701f, -0.492898196f, -0.4821837842f, -0.4713967443f, -0.4605387151f, -0.449611336f, -0.438616246f, -0.4275550842f, -0.4164295495f, -0.4052413106f, -0.3939920366f, -0.3826834261f, -0.3713172078f, -0.3598950505f, -0.3484186828f, -0.336889863f, -0.3253102899f, -0.3136817515f, -0.3020059466f, -0.2902846634f, -0.27851969f, -0.266712755f, -0.2548656464f, -0.2429801822f, -0.2310581058f, -0.2191012353f, -0.2071113735f, -0.1950903237f, -0.1830398887f, -0.1709618866f, -0.1588581502f, -0.1467304677f, -0.1345807016f, -0.1224106774f, -0.1102222055f, -0.09801714122f, -0.08579730988f, -0.07356456667f, -0.061320737f, -0.04906767607f, -0.03680722415f, -0.02454122901f, -0.01227153838f, -0.0f, -0.01227153838f, -0.02454122901f, -0.03680722415f, -0.04906767607f, -0.061320737f, -0.07356456667f, -0.08579730988f, -0.09801714122f, -0.1102222055f, -0.1224106774f, -0.1345807016f, -0.1467304677f, -0.1588581502f, -0.1709618866f, -0.1830398887f, -0.1950903237f, -0.2071113735f, -0.2191012353f, -0.2310581058f, -0.2429801822f, -0.2548656464f, -0.266712755f, -0.27851969f, -0.2902846634f, -0.3020059466f, -0.3136817515f, -0.3253102899f, -0.336889863f, -0.3484186828f, -0.3598950505f, -0.3713172078f, -0.3826834261f, -0.3939920366f, -0.4052413106f, -0.4164295495f, -0.4275550842f, -0.438616246f, -0.449611336f, -0.4605387151f, -0.4713967443f, -0.4821837842f, -0.492898196f, -0.5035383701f, -0.514102757f, -0.5245896578f, -0.534997642f, -0.5453249812f, -0.5555702448f, -0.5657318234f, -0.5758081675f, -0.5857978463f, -0.5956993103f, -0.6055110693f, -0.6152315736f, -0.6248595119f, -0.6343932748f, -0.6438315511f, -0.6531728506f, -0.6624158025f, -0.6715589762f, -0.6806010008f, -0.689540565f, -0.6983762383f, -0.7071067691f, -0.7157308459f, -0.724247098f, -0.7326542735f, -0.7409511209f, -0.7491363883f, -0.7572088242f, -0.7651672363f, -0.7730104327f, -0.7807372212f, -0.7883464098f, -0.7958369255f, -0.8032075167f, -0.81045717f, -0.8175848126f, -0.8245893121f, -0.8314695954f, -0.838224709f, -0.84485358f, -0.851355195f, -0.8577286005f, -0.8639728427f, -0.8700869679f, -0.8760700822f, -0.8819212914f, -0.8876396418f, -0.893224299f, -0.8986744881f, -0.903989315f, -0.909168005f, -0.9142097831f, -0.9191138744f, -0.9238795042f, -0.9285060763f, -0.932992816f, -0.9373390079f, -0.9415440559f, -0.9456073046f, -0.9495281577f, -0.9533060193f, -0.9569403529f, -0.9604305029f, -0.963776052f, -0.9669764638f, -0.9700312614f, -0.9729399681f, -0.975702107f, -0.97831738f, -0.9807852507f, -0.9831054807f, -0.9852776527f, -0.9873014092f, -0.9891765118f, -0.9909026623f, -0.9924795628f, -0.9939069748f, -0.9951847196f, -0.9963126183f, -0.9972904325f, -0.9981181026f, -0.9987954497f, -0.9993223548f, -0.9996988177f, -0.9999247193f, -1.0f, -0.9999247193f, -0.9996988177f, -0.9993223548f, -0.9987954497f, -0.9981181026f, -0.9972904325f, -0.9963126183f, -0.9951847196f, -0.9939069748f, -0.9924795628f, -0.9909026623f, -0.9891765118f, -0.9873014092f, -0.9852776527f, -0.9831054807f, -0.9807852507f, -0.97831738f, -0.975702107f, -0.9729399681f, -0.9700312614f, -0.9669764638f, -0.963776052f, -0.9604305029f, -0.9569403529f, -0.9533060193f, -0.9495281577f, -0.9456073046f, -0.9415440559f, -0.9373390079f, -0.932992816f, -0.9285060763f, -0.9238795042f, -0.9191138744f, -0.9142097831f, -0.909168005f, -0.903989315f, -0.8986744881f, -0.893224299f, -0.8876396418f, -0.8819212914f, -0.8760700822f, -0.8700869679f, -0.8639728427f, -0.8577286005f, -0.851355195f, -0.84485358f, -0.838224709f, -0.8314695954f, -0.8245893121f, -0.8175848126f, -0.81045717f, -0.8032075167f, -0.7958369255f, -0.7883464098f, -0.7807372212f, -0.7730104327f, -0.7651672363f, -0.7572088242f, -0.7491363883f, -0.7409511209f, -0.7326542735f, -0.724247098f, -0.7157308459f, -0.7071067691f, -0.6983762383f, -0.689540565f, -0.6806010008f, -0.6715589762f, -0.6624158025f, -0.6531728506f, -0.6438315511f, -0.6343932748f, -0.6248595119f, -0.6152315736f, -0.6055110693f, -0.5956993103f, -0.5857978463f, -0.5758081675f, -0.5657318234f, -0.5555702448f, -0.5453249812f, -0.534997642f, -0.5245896578f, -0.514102757f, -0.5035383701f, -0.492898196f, -0.4821837842f, -0.4713967443f, -0.4605387151f, -0.449611336f, -0.438616246f, -0.4275550842f, -0.4164295495f, -0.4052413106f, -0.3939920366f, -0.3826834261f, -0.3713172078f, -0.3598950505f, -0.3484186828f, -0.336889863f, -0.3253102899f, -0.3136817515f, -0.3020059466f, -0.2902846634f, -0.27851969f, -0.266712755f, -0.2548656464f, -0.2429801822f, -0.2310581058f, -0.2191012353f, -0.2071113735f, -0.1950903237f, -0.1830398887f, -0.1709618866f, -0.1588581502f, -0.1467304677f, -0.1345807016f, -0.1224106774f, -0.1102222055f, -0.09801714122f, -0.08579730988f, -0.07356456667f, -0.061320737f, -0.04906767607f, -0.03680722415f, -0.02454122901f, -0.01227153838f}; -constant float ts11[512] = {-0.0f, -0.006135884672f, -0.01227153838f, -0.01840673015f, -0.02454122901f, -0.030674804f, -0.03680722415f, -0.0429382585f, -0.04906767607f, -0.05519524589f, -0.061320737f, -0.06744392216f, -0.07356456667f, -0.07968243957f, -0.08579730988f, -0.09190895408f, -0.09801714122f, -0.1041216329f, -0.1102222055f, -0.1163186282f, -0.1224106774f, -0.1284981072f, -0.1345807016f, -0.1406582445f, -0.1467304677f, -0.1527971923f, -0.1588581502f, -0.1649131179f, -0.1709618866f, -0.1770042181f, -0.1830398887f, -0.1890686601f, -0.1950903237f, -0.201104641f, -0.2071113735f, -0.2131103128f, -0.2191012353f, -0.2250839174f, -0.2310581058f, -0.2370236069f, -0.2429801822f, -0.2489276081f, -0.2548656464f, -0.2607941031f, -0.266712755f, -0.2726213634f, -0.27851969f, -0.2844075263f, -0.2902846634f, -0.296150893f, -0.3020059466f, -0.3078496456f, -0.3136817515f, -0.3195020258f, -0.3253102899f, -0.3311063051f, -0.336889863f, -0.3426607251f, -0.3484186828f, -0.3541635275f, -0.3598950505f, -0.3656129837f, -0.3713172078f, -0.3770074248f, -0.3826834261f, -0.3883450329f, -0.3939920366f, -0.3996241987f, -0.4052413106f, -0.4108431637f, -0.4164295495f, -0.4220002592f, -0.4275550842f, -0.433093816f, -0.438616246f, -0.4441221356f, -0.449611336f, -0.4550835788f, -0.4605387151f, -0.4659765065f, -0.4713967443f, -0.4767992198f, -0.4821837842f, -0.4875501692f, -0.492898196f, -0.4982276559f, -0.5035383701f, -0.5088301301f, -0.514102757f, -0.5193560123f, -0.5245896578f, -0.5298036337f, -0.534997642f, -0.5401714444f, -0.5453249812f, -0.5504579544f, -0.5555702448f, -0.5606615543f, -0.5657318234f, -0.5707807541f, -0.5758081675f, -0.5808139443f, -0.5857978463f, -0.5907596946f, -0.5956993103f, -0.6006164551f, -0.6055110693f, -0.6103827953f, -0.6152315736f, -0.6200572252f, -0.6248595119f, -0.6296382546f, -0.6343932748f, -0.6391244531f, -0.6438315511f, -0.64851439f, -0.6531728506f, -0.6578066945f, -0.6624158025f, -0.6669999361f, -0.6715589762f, -0.6760926843f, -0.6806010008f, -0.6850836873f, -0.689540565f, -0.6939714551f, -0.6983762383f, -0.7027547359f, -0.7071067691f, -0.7114322186f, -0.7157308459f, -0.720002532f, -0.724247098f, -0.728464365f, -0.7326542735f, -0.7368165851f, -0.7409511209f, -0.7450577617f, -0.7491363883f, -0.7531868219f, -0.7572088242f, -0.761202395f, -0.7651672363f, -0.7691033483f, -0.7730104327f, -0.7768884897f, -0.7807372212f, -0.7845565677f, -0.7883464098f, -0.7921065688f, -0.7958369255f, -0.7995372415f, -0.8032075167f, -0.8068475723f, -0.81045717f, -0.8140363097f, -0.8175848126f, -0.8211025f, -0.8245893121f, -0.8280450702f, -0.8314695954f, -0.8348628879f, -0.838224709f, -0.8415549994f, -0.84485358f, -0.8481203318f, -0.851355195f, -0.854557991f, -0.8577286005f, -0.8608669639f, -0.8639728427f, -0.867046237f, -0.8700869679f, -0.8730949759f, -0.8760700822f, -0.8790122271f, -0.8819212914f, -0.8847970963f, -0.8876396418f, -0.8904487491f, -0.893224299f, -0.8959662318f, -0.8986744881f, -0.9013488293f, -0.903989315f, -0.9065957069f, -0.909168005f, -0.9117060304f, -0.9142097831f, -0.9166790843f, -0.9191138744f, -0.9215140343f, -0.9238795042f, -0.9262102246f, -0.9285060763f, -0.9307669401f, -0.932992816f, -0.9351835251f, -0.9373390079f, -0.9394592047f, -0.9415440559f, -0.9435934424f, -0.9456073046f, -0.9475855827f, -0.9495281577f, -0.9514350295f, -0.9533060193f, -0.9551411867f, -0.9569403529f, -0.9587034583f, -0.9604305029f, -0.9621214271f, -0.963776052f, -0.9653944373f, -0.9669764638f, -0.9685220718f, -0.9700312614f, -0.9715039134f, -0.9729399681f, -0.974339366f, -0.975702107f, -0.9770281315f, -0.97831738f, -0.9795697927f, -0.9807852507f, -0.9819638729f, -0.9831054807f, -0.9842100739f, -0.9852776527f, -0.9863080978f, -0.9873014092f, -0.988257587f, -0.9891765118f, -0.9900581837f, -0.9909026623f, -0.9917097688f, -0.9924795628f, -0.993211925f, -0.9939069748f, -0.9945645928f, -0.9951847196f, -0.9957674146f, -0.9963126183f, -0.996820271f, -0.9972904325f, -0.997723043f, -0.9981181026f, -0.9984755516f, -0.9987954497f, -0.9990777373f, -0.9993223548f, -0.9995294213f, -0.9996988177f, -0.9998306036f, -0.9999247193f, -0.9999811649f, -0.0f, -0.006135884672f, -0.01227153838f, -0.01840673015f, -0.02454122901f, -0.030674804f, -0.03680722415f, -0.0429382585f, -0.04906767607f, -0.05519524589f, -0.061320737f, -0.06744392216f, -0.07356456667f, -0.07968243957f, -0.08579730988f, -0.09190895408f, -0.09801714122f, -0.1041216329f, -0.1102222055f, -0.1163186282f, -0.1224106774f, -0.1284981072f, -0.1345807016f, -0.1406582445f, -0.1467304677f, -0.1527971923f, -0.1588581502f, -0.1649131179f, -0.1709618866f, -0.1770042181f, -0.1830398887f, -0.1890686601f, -0.1950903237f, -0.201104641f, -0.2071113735f, -0.2131103128f, -0.2191012353f, -0.2250839174f, -0.2310581058f, -0.2370236069f, -0.2429801822f, -0.2489276081f, -0.2548656464f, -0.2607941031f, -0.266712755f, -0.2726213634f, -0.27851969f, -0.2844075263f, -0.2902846634f, -0.296150893f, -0.3020059466f, -0.3078496456f, -0.3136817515f, -0.3195020258f, -0.3253102899f, -0.3311063051f, -0.336889863f, -0.3426607251f, -0.3484186828f, -0.3541635275f, -0.3598950505f, -0.3656129837f, -0.3713172078f, -0.3770074248f, -0.3826834261f, -0.3883450329f, -0.3939920366f, -0.3996241987f, -0.4052413106f, -0.4108431637f, -0.4164295495f, -0.4220002592f, -0.4275550842f, -0.433093816f, -0.438616246f, -0.4441221356f, -0.449611336f, -0.4550835788f, -0.4605387151f, -0.4659765065f, -0.4713967443f, -0.4767992198f, -0.4821837842f, -0.4875501692f, -0.492898196f, -0.4982276559f, -0.5035383701f, -0.5088301301f, -0.514102757f, -0.5193560123f, -0.5245896578f, -0.5298036337f, -0.534997642f, -0.5401714444f, -0.5453249812f, -0.5504579544f, -0.5555702448f, -0.5606615543f, -0.5657318234f, -0.5707807541f, -0.5758081675f, -0.5808139443f, -0.5857978463f, -0.5907596946f, -0.5956993103f, -0.6006164551f, -0.6055110693f, -0.6103827953f, -0.6152315736f, -0.6200572252f, -0.6248595119f, -0.6296382546f, -0.6343932748f, -0.6391244531f, -0.6438315511f, -0.64851439f, -0.6531728506f, -0.6578066945f, -0.6624158025f, -0.6669999361f, -0.6715589762f, -0.6760926843f, -0.6806010008f, -0.6850836873f, -0.689540565f, -0.6939714551f, -0.6983762383f, -0.7027547359f, -0.7071067691f, -0.7114322186f, -0.7157308459f, -0.720002532f, -0.724247098f, -0.728464365f, -0.7326542735f, -0.7368165851f, -0.7409511209f, -0.7450577617f, -0.7491363883f, -0.7531868219f, -0.7572088242f, -0.761202395f, -0.7651672363f, -0.7691033483f, -0.7730104327f, -0.7768884897f, -0.7807372212f, -0.7845565677f, -0.7883464098f, -0.7921065688f, -0.7958369255f, -0.7995372415f, -0.8032075167f, -0.8068475723f, -0.81045717f, -0.8140363097f, -0.8175848126f, -0.8211025f, -0.8245893121f, -0.8280450702f, -0.8314695954f, -0.8348628879f, -0.838224709f, -0.8415549994f, -0.84485358f, -0.8481203318f, -0.851355195f, -0.854557991f, -0.8577286005f, -0.8608669639f, -0.8639728427f, -0.867046237f, -0.8700869679f, -0.8730949759f, -0.8760700822f, -0.8790122271f, -0.8819212914f, -0.8847970963f, -0.8876396418f, -0.8904487491f, -0.893224299f, -0.8959662318f, -0.8986744881f, -0.9013488293f, -0.903989315f, -0.9065957069f, -0.909168005f, -0.9117060304f, -0.9142097831f, -0.9166790843f, -0.9191138744f, -0.9215140343f, -0.9238795042f, -0.9262102246f, -0.9285060763f, -0.9307669401f, -0.932992816f, -0.9351835251f, -0.9373390079f, -0.9394592047f, -0.9415440559f, -0.9435934424f, -0.9456073046f, -0.9475855827f, -0.9495281577f, -0.9514350295f, -0.9533060193f, -0.9551411867f, -0.9569403529f, -0.9587034583f, -0.9604305029f, -0.9621214271f, -0.963776052f, -0.9653944373f, -0.9669764638f, -0.9685220718f, -0.9700312614f, -0.9715039134f, -0.9729399681f, -0.974339366f, -0.975702107f, -0.9770281315f, -0.97831738f, -0.9795697927f, -0.9807852507f, -0.9819638729f, -0.9831054807f, -0.9842100739f, -0.9852776527f, -0.9863080978f, -0.9873014092f, -0.988257587f, -0.9891765118f, -0.9900581837f, -0.9909026623f, -0.9917097688f, -0.9924795628f, -0.993211925f, -0.9939069748f, -0.9945645928f, -0.9951847196f, -0.9957674146f, -0.9963126183f, -0.996820271f, -0.9972904325f, -0.997723043f, -0.9981181026f, -0.9984755516f, -0.9987954497f, -0.9990777373f, -0.9993223548f, -0.9995294213f, -0.9996988177f, -0.9998306036f, -0.9999247193f, -0.9999811649f}; -constant float ts14[512] = {-0.0f, -0.006135884672f, -0.01227153838f, -0.01840673015f, -0.02454122901f, -0.030674804f, -0.03680722415f, -0.0429382585f, -0.04906767607f, -0.05519524589f, -0.061320737f, -0.06744392216f, -0.07356456667f, -0.07968243957f, -0.08579730988f, -0.09190895408f, -0.09801714122f, -0.1041216329f, -0.1102222055f, -0.1163186282f, -0.1224106774f, -0.1284981072f, -0.1345807016f, -0.1406582445f, -0.1467304677f, -0.1527971923f, -0.1588581502f, -0.1649131179f, -0.1709618866f, -0.1770042181f, -0.1830398887f, -0.1890686601f, -0.1950903237f, -0.201104641f, -0.2071113735f, -0.2131103128f, -0.2191012353f, -0.2250839174f, -0.2310581058f, -0.2370236069f, -0.2429801822f, -0.2489276081f, -0.2548656464f, -0.2607941031f, -0.266712755f, -0.2726213634f, -0.27851969f, -0.2844075263f, -0.2902846634f, -0.296150893f, -0.3020059466f, -0.3078496456f, -0.3136817515f, -0.3195020258f, -0.3253102899f, -0.3311063051f, -0.336889863f, -0.3426607251f, -0.3484186828f, -0.3541635275f, -0.3598950505f, -0.3656129837f, -0.3713172078f, -0.3770074248f, -0.3826834261f, -0.3883450329f, -0.3939920366f, -0.3996241987f, -0.4052413106f, -0.4108431637f, -0.4164295495f, -0.4220002592f, -0.4275550842f, -0.433093816f, -0.438616246f, -0.4441221356f, -0.449611336f, -0.4550835788f, -0.4605387151f, -0.4659765065f, -0.4713967443f, -0.4767992198f, -0.4821837842f, -0.4875501692f, -0.492898196f, -0.4982276559f, -0.5035383701f, -0.5088301301f, -0.514102757f, -0.5193560123f, -0.5245896578f, -0.5298036337f, -0.534997642f, -0.5401714444f, -0.5453249812f, -0.5504579544f, -0.5555702448f, -0.5606615543f, -0.5657318234f, -0.5707807541f, -0.5758081675f, -0.5808139443f, -0.5857978463f, -0.5907596946f, -0.5956993103f, -0.6006164551f, -0.6055110693f, -0.6103827953f, -0.6152315736f, -0.6200572252f, -0.6248595119f, -0.6296382546f, -0.6343932748f, -0.6391244531f, -0.6438315511f, -0.64851439f, -0.6531728506f, -0.6578066945f, -0.6624158025f, -0.6669999361f, -0.6715589762f, -0.6760926843f, -0.6806010008f, -0.6850836873f, -0.689540565f, -0.6939714551f, -0.6983762383f, -0.7027547359f, -0.7071067691f, -0.7114322186f, -0.7157308459f, -0.720002532f, -0.724247098f, -0.728464365f, -0.7326542735f, -0.7368165851f, -0.7409511209f, -0.7450577617f, -0.7491363883f, -0.7531868219f, -0.7572088242f, -0.761202395f, -0.7651672363f, -0.7691033483f, -0.7730104327f, -0.7768884897f, -0.7807372212f, -0.7845565677f, -0.7883464098f, -0.7921065688f, -0.7958369255f, -0.7995372415f, -0.8032075167f, -0.8068475723f, -0.81045717f, -0.8140363097f, -0.8175848126f, -0.8211025f, -0.8245893121f, -0.8280450702f, -0.8314695954f, -0.8348628879f, -0.838224709f, -0.8415549994f, -0.84485358f, -0.8481203318f, -0.851355195f, -0.854557991f, -0.8577286005f, -0.8608669639f, -0.8639728427f, -0.867046237f, -0.8700869679f, -0.8730949759f, -0.8760700822f, -0.8790122271f, -0.8819212914f, -0.8847970963f, -0.8876396418f, -0.8904487491f, -0.893224299f, -0.8959662318f, -0.8986744881f, -0.9013488293f, -0.903989315f, -0.9065957069f, -0.909168005f, -0.9117060304f, -0.9142097831f, -0.9166790843f, -0.9191138744f, -0.9215140343f, -0.9238795042f, -0.9262102246f, -0.9285060763f, -0.9307669401f, -0.932992816f, -0.9351835251f, -0.9373390079f, -0.9394592047f, -0.9415440559f, -0.9435934424f, -0.9456073046f, -0.9475855827f, -0.9495281577f, -0.9514350295f, -0.9533060193f, -0.9551411867f, -0.9569403529f, -0.9587034583f, -0.9604305029f, -0.9621214271f, -0.963776052f, -0.9653944373f, -0.9669764638f, -0.9685220718f, -0.9700312614f, -0.9715039134f, -0.9729399681f, -0.974339366f, -0.975702107f, -0.9770281315f, -0.97831738f, -0.9795697927f, -0.9807852507f, -0.9819638729f, -0.9831054807f, -0.9842100739f, -0.9852776527f, -0.9863080978f, -0.9873014092f, -0.988257587f, -0.9891765118f, -0.9900581837f, -0.9909026623f, -0.9917097688f, -0.9924795628f, -0.993211925f, -0.9939069748f, -0.9945645928f, -0.9951847196f, -0.9957674146f, -0.9963126183f, -0.996820271f, -0.9972904325f, -0.997723043f, -0.9981181026f, -0.9984755516f, -0.9987954497f, -0.9990777373f, -0.9993223548f, -0.9995294213f, -0.9996988177f, -0.9998306036f, -0.9999247193f, -0.9999811649f, -0.0f, -0.006135884672f, -0.01227153838f, -0.01840673015f, -0.02454122901f, -0.030674804f, -0.03680722415f, -0.0429382585f, -0.04906767607f, -0.05519524589f, -0.061320737f, -0.06744392216f, -0.07356456667f, -0.07968243957f, -0.08579730988f, -0.09190895408f, -0.09801714122f, -0.1041216329f, -0.1102222055f, -0.1163186282f, -0.1224106774f, -0.1284981072f, -0.1345807016f, -0.1406582445f, -0.1467304677f, -0.1527971923f, -0.1588581502f, -0.1649131179f, -0.1709618866f, -0.1770042181f, -0.1830398887f, -0.1890686601f, -0.1950903237f, -0.201104641f, -0.2071113735f, -0.2131103128f, -0.2191012353f, -0.2250839174f, -0.2310581058f, -0.2370236069f, -0.2429801822f, -0.2489276081f, -0.2548656464f, -0.2607941031f, -0.266712755f, -0.2726213634f, -0.27851969f, -0.2844075263f, -0.2902846634f, -0.296150893f, -0.3020059466f, -0.3078496456f, -0.3136817515f, -0.3195020258f, -0.3253102899f, -0.3311063051f, -0.336889863f, -0.3426607251f, -0.3484186828f, -0.3541635275f, -0.3598950505f, -0.3656129837f, -0.3713172078f, -0.3770074248f, -0.3826834261f, -0.3883450329f, -0.3939920366f, -0.3996241987f, -0.4052413106f, -0.4108431637f, -0.4164295495f, -0.4220002592f, -0.4275550842f, -0.433093816f, -0.438616246f, -0.4441221356f, -0.449611336f, -0.4550835788f, -0.4605387151f, -0.4659765065f, -0.4713967443f, -0.4767992198f, -0.4821837842f, -0.4875501692f, -0.492898196f, -0.4982276559f, -0.5035383701f, -0.5088301301f, -0.514102757f, -0.5193560123f, -0.5245896578f, -0.5298036337f, -0.534997642f, -0.5401714444f, -0.5453249812f, -0.5504579544f, -0.5555702448f, -0.5606615543f, -0.5657318234f, -0.5707807541f, -0.5758081675f, -0.5808139443f, -0.5857978463f, -0.5907596946f, -0.5956993103f, -0.6006164551f, -0.6055110693f, -0.6103827953f, -0.6152315736f, -0.6200572252f, -0.6248595119f, -0.6296382546f, -0.6343932748f, -0.6391244531f, -0.6438315511f, -0.64851439f, -0.6531728506f, -0.6578066945f, -0.6624158025f, -0.6669999361f, -0.6715589762f, -0.6760926843f, -0.6806010008f, -0.6850836873f, -0.689540565f, -0.6939714551f, -0.6983762383f, -0.7027547359f, -0.7071067691f, -0.7114322186f, -0.7157308459f, -0.720002532f, -0.724247098f, -0.728464365f, -0.7326542735f, -0.7368165851f, -0.7409511209f, -0.7450577617f, -0.7491363883f, -0.7531868219f, -0.7572088242f, -0.761202395f, -0.7651672363f, -0.7691033483f, -0.7730104327f, -0.7768884897f, -0.7807372212f, -0.7845565677f, -0.7883464098f, -0.7921065688f, -0.7958369255f, -0.7995372415f, -0.8032075167f, -0.8068475723f, -0.81045717f, -0.8140363097f, -0.8175848126f, -0.8211025f, -0.8245893121f, -0.8280450702f, -0.8314695954f, -0.8348628879f, -0.838224709f, -0.8415549994f, -0.84485358f, -0.8481203318f, -0.851355195f, -0.854557991f, -0.8577286005f, -0.8608669639f, -0.8639728427f, -0.867046237f, -0.8700869679f, -0.8730949759f, -0.8760700822f, -0.8790122271f, -0.8819212914f, -0.8847970963f, -0.8876396418f, -0.8904487491f, -0.893224299f, -0.8959662318f, -0.8986744881f, -0.9013488293f, -0.903989315f, -0.9065957069f, -0.909168005f, -0.9117060304f, -0.9142097831f, -0.9166790843f, -0.9191138744f, -0.9215140343f, -0.9238795042f, -0.9262102246f, -0.9285060763f, -0.9307669401f, -0.932992816f, -0.9351835251f, -0.9373390079f, -0.9394592047f, -0.9415440559f, -0.9435934424f, -0.9456073046f, -0.9475855827f, -0.9495281577f, -0.9514350295f, -0.9533060193f, -0.9551411867f, -0.9569403529f, -0.9587034583f, -0.9604305029f, -0.9621214271f, -0.963776052f, -0.9653944373f, -0.9669764638f, -0.9685220718f, -0.9700312614f, -0.9715039134f, -0.9729399681f, -0.974339366f, -0.975702107f, -0.9770281315f, -0.97831738f, -0.9795697927f, -0.9807852507f, -0.9819638729f, -0.9831054807f, -0.9842100739f, -0.9852776527f, -0.9863080978f, -0.9873014092f, -0.988257587f, -0.9891765118f, -0.9900581837f, -0.9909026623f, -0.9917097688f, -0.9924795628f, -0.993211925f, -0.9939069748f, -0.9945645928f, -0.9951847196f, -0.9957674146f, -0.9963126183f, -0.996820271f, -0.9972904325f, -0.997723043f, -0.9981181026f, -0.9984755516f, -0.9987954497f, -0.9990777373f, -0.9993223548f, -0.9995294213f, -0.9996988177f, -0.9998306036f, -0.9999247193f, -0.9999811649f}; -constant float ts12[512] = {-0.0f, -0.01840673015f, -0.03680722415f, -0.05519524589f, -0.07356456667f, -0.09190895408f, -0.1102222055f, -0.1284981072f, -0.1467304677f, -0.1649131179f, -0.1830398887f, -0.201104641f, -0.2191012353f, -0.2370236069f, -0.2548656464f, -0.2726213634f, -0.2902846634f, -0.3078496456f, -0.3253102899f, -0.3426607251f, -0.3598950505f, -0.3770074248f, -0.3939920366f, -0.4108431637f, -0.4275550842f, -0.4441221356f, -0.4605387151f, -0.4767992198f, -0.492898196f, -0.5088301301f, -0.5245896578f, -0.5401714444f, -0.5555702448f, -0.5707807541f, -0.5857978463f, -0.6006164551f, -0.6152315736f, -0.6296382546f, -0.6438315511f, -0.6578066945f, -0.6715589762f, -0.6850836873f, -0.6983762383f, -0.7114322186f, -0.724247098f, -0.7368165851f, -0.7491363883f, -0.761202395f, -0.7730104327f, -0.7845565677f, -0.7958369255f, -0.8068475723f, -0.8175848126f, -0.8280450702f, -0.838224709f, -0.8481203318f, -0.8577286005f, -0.867046237f, -0.8760700822f, -0.8847970963f, -0.893224299f, -0.9013488293f, -0.909168005f, -0.9166790843f, -0.9238795042f, -0.9307669401f, -0.9373390079f, -0.9435934424f, -0.9495281577f, -0.9551411867f, -0.9604305029f, -0.9653944373f, -0.9700312614f, -0.974339366f, -0.97831738f, -0.9819638729f, -0.9852776527f, -0.988257587f, -0.9909026623f, -0.993211925f, -0.9951847196f, -0.996820271f, -0.9981181026f, -0.9990777373f, -0.9996988177f, -0.9999811649f, -0.9999247193f, -0.9995294213f, -0.9987954497f, -0.997723043f, -0.9963126183f, -0.9945645928f, -0.9924795628f, -0.9900581837f, -0.9873014092f, -0.9842100739f, -0.9807852507f, -0.9770281315f, -0.9729399681f, -0.9685220718f, -0.963776052f, -0.9587034583f, -0.9533060193f, -0.9475855827f, -0.9415440559f, -0.9351835251f, -0.9285060763f, -0.9215140343f, -0.9142097831f, -0.9065957069f, -0.8986744881f, -0.8904487491f, -0.8819212914f, -0.8730949759f, -0.8639728427f, -0.854557991f, -0.84485358f, -0.8348628879f, -0.8245893121f, -0.8140363097f, -0.8032075167f, -0.7921065688f, -0.7807372212f, -0.7691033483f, -0.7572088242f, -0.7450577617f, -0.7326542735f, -0.720002532f, -0.7071067691f, -0.6939714551f, -0.6806010008f, -0.6669999361f, -0.6531728506f, -0.6391244531f, -0.6248595119f, -0.6103827953f, -0.5956993103f, -0.5808139443f, -0.5657318234f, -0.5504579544f, -0.534997642f, -0.5193560123f, -0.5035383701f, -0.4875501692f, -0.4713967443f, -0.4550835788f, -0.438616246f, -0.4220002592f, -0.4052413106f, -0.3883450329f, -0.3713172078f, -0.3541635275f, -0.336889863f, -0.3195020258f, -0.3020059466f, -0.2844075263f, -0.266712755f, -0.2489276081f, -0.2310581058f, -0.2131103128f, -0.1950903237f, -0.1770042181f, -0.1588581502f, -0.1406582445f, -0.1224106774f, -0.1041216329f, -0.08579730988f, -0.06744392216f, -0.04906767607f, -0.030674804f, -0.01227153838f, 0.006135884672f, 0.02454122901f, 0.0429382585f, 0.061320737f, 0.07968243957f, 0.09801714122f, 0.1163186282f, 0.1345807016f, 0.1527971923f, 0.1709618866f, 0.1890686601f, 0.2071113735f, 0.2250839174f, 0.2429801822f, 0.2607941031f, 0.27851969f, 0.296150893f, 0.3136817515f, 0.3311063051f, 0.3484186828f, 0.3656129837f, 0.3826834261f, 0.3996241987f, 0.4164295495f, 0.433093816f, 0.449611336f, 0.4659765065f, 0.4821837842f, 0.4982276559f, 0.514102757f, 0.5298036337f, 0.5453249812f, 0.5606615543f, 0.5758081675f, 0.5907596946f, 0.6055110693f, 0.6200572252f, 0.6343932748f, 0.64851439f, 0.6624158025f, 0.6760926843f, 0.689540565f, 0.7027547359f, 0.7157308459f, 0.728464365f, 0.7409511209f, 0.7531868219f, 0.7651672363f, 0.7768884897f, 0.7883464098f, 0.7995372415f, 0.81045717f, 0.8211025f, 0.8314695954f, 0.8415549994f, 0.851355195f, 0.8608669639f, 0.8700869679f, 0.8790122271f, 0.8876396418f, 0.8959662318f, 0.903989315f, 0.9117060304f, 0.9191138744f, 0.9262102246f, 0.932992816f, 0.9394592047f, 0.9456073046f, 0.9514350295f, 0.9569403529f, 0.9621214271f, 0.9669764638f, 0.9715039134f, 0.975702107f, 0.9795697927f, 0.9831054807f, 0.9863080978f, 0.9891765118f, 0.9917097688f, 0.9939069748f, 0.9957674146f, 0.9972904325f, 0.9984755516f, 0.9993223548f, 0.9998306036f, -0.0f, -0.01840673015f, -0.03680722415f, -0.05519524589f, -0.07356456667f, -0.09190895408f, -0.1102222055f, -0.1284981072f, -0.1467304677f, -0.1649131179f, -0.1830398887f, -0.201104641f, -0.2191012353f, -0.2370236069f, -0.2548656464f, -0.2726213634f, -0.2902846634f, -0.3078496456f, -0.3253102899f, -0.3426607251f, -0.3598950505f, -0.3770074248f, -0.3939920366f, -0.4108431637f, -0.4275550842f, -0.4441221356f, -0.4605387151f, -0.4767992198f, -0.492898196f, -0.5088301301f, -0.5245896578f, -0.5401714444f, -0.5555702448f, -0.5707807541f, -0.5857978463f, -0.6006164551f, -0.6152315736f, -0.6296382546f, -0.6438315511f, -0.6578066945f, -0.6715589762f, -0.6850836873f, -0.6983762383f, -0.7114322186f, -0.724247098f, -0.7368165851f, -0.7491363883f, -0.761202395f, -0.7730104327f, -0.7845565677f, -0.7958369255f, -0.8068475723f, -0.8175848126f, -0.8280450702f, -0.838224709f, -0.8481203318f, -0.8577286005f, -0.867046237f, -0.8760700822f, -0.8847970963f, -0.893224299f, -0.9013488293f, -0.909168005f, -0.9166790843f, -0.9238795042f, -0.9307669401f, -0.9373390079f, -0.9435934424f, -0.9495281577f, -0.9551411867f, -0.9604305029f, -0.9653944373f, -0.9700312614f, -0.974339366f, -0.97831738f, -0.9819638729f, -0.9852776527f, -0.988257587f, -0.9909026623f, -0.993211925f, -0.9951847196f, -0.996820271f, -0.9981181026f, -0.9990777373f, -0.9996988177f, -0.9999811649f, -0.9999247193f, -0.9995294213f, -0.9987954497f, -0.997723043f, -0.9963126183f, -0.9945645928f, -0.9924795628f, -0.9900581837f, -0.9873014092f, -0.9842100739f, -0.9807852507f, -0.9770281315f, -0.9729399681f, -0.9685220718f, -0.963776052f, -0.9587034583f, -0.9533060193f, -0.9475855827f, -0.9415440559f, -0.9351835251f, -0.9285060763f, -0.9215140343f, -0.9142097831f, -0.9065957069f, -0.8986744881f, -0.8904487491f, -0.8819212914f, -0.8730949759f, -0.8639728427f, -0.854557991f, -0.84485358f, -0.8348628879f, -0.8245893121f, -0.8140363097f, -0.8032075167f, -0.7921065688f, -0.7807372212f, -0.7691033483f, -0.7572088242f, -0.7450577617f, -0.7326542735f, -0.720002532f, -0.7071067691f, -0.6939714551f, -0.6806010008f, -0.6669999361f, -0.6531728506f, -0.6391244531f, -0.6248595119f, -0.6103827953f, -0.5956993103f, -0.5808139443f, -0.5657318234f, -0.5504579544f, -0.534997642f, -0.5193560123f, -0.5035383701f, -0.4875501692f, -0.4713967443f, -0.4550835788f, -0.438616246f, -0.4220002592f, -0.4052413106f, -0.3883450329f, -0.3713172078f, -0.3541635275f, -0.336889863f, -0.3195020258f, -0.3020059466f, -0.2844075263f, -0.266712755f, -0.2489276081f, -0.2310581058f, -0.2131103128f, -0.1950903237f, -0.1770042181f, -0.1588581502f, -0.1406582445f, -0.1224106774f, -0.1041216329f, -0.08579730988f, -0.06744392216f, -0.04906767607f, -0.030674804f, -0.01227153838f, 0.006135884672f, 0.02454122901f, 0.0429382585f, 0.061320737f, 0.07968243957f, 0.09801714122f, 0.1163186282f, 0.1345807016f, 0.1527971923f, 0.1709618866f, 0.1890686601f, 0.2071113735f, 0.2250839174f, 0.2429801822f, 0.2607941031f, 0.27851969f, 0.296150893f, 0.3136817515f, 0.3311063051f, 0.3484186828f, 0.3656129837f, 0.3826834261f, 0.3996241987f, 0.4164295495f, 0.433093816f, 0.449611336f, 0.4659765065f, 0.4821837842f, 0.4982276559f, 0.514102757f, 0.5298036337f, 0.5453249812f, 0.5606615543f, 0.5758081675f, 0.5907596946f, 0.6055110693f, 0.6200572252f, 0.6343932748f, 0.64851439f, 0.6624158025f, 0.6760926843f, 0.689540565f, 0.7027547359f, 0.7157308459f, 0.728464365f, 0.7409511209f, 0.7531868219f, 0.7651672363f, 0.7768884897f, 0.7883464098f, 0.7995372415f, 0.81045717f, 0.8211025f, 0.8314695954f, 0.8415549994f, 0.851355195f, 0.8608669639f, 0.8700869679f, 0.8790122271f, 0.8876396418f, 0.8959662318f, 0.903989315f, 0.9117060304f, 0.9191138744f, 0.9262102246f, 0.932992816f, 0.9394592047f, 0.9456073046f, 0.9514350295f, 0.9569403529f, 0.9621214271f, 0.9669764638f, 0.9715039134f, 0.975702107f, 0.9795697927f, 0.9831054807f, 0.9863080978f, 0.9891765118f, 0.9917097688f, 0.9939069748f, 0.9957674146f, 0.9972904325f, 0.9984755516f, 0.9993223548f, 0.9998306036f}; -constant float ts15[512] = {-0.0f, -0.01840673015f, -0.03680722415f, -0.05519524589f, -0.07356456667f, -0.09190895408f, -0.1102222055f, -0.1284981072f, -0.1467304677f, -0.1649131179f, -0.1830398887f, -0.201104641f, -0.2191012353f, -0.2370236069f, -0.2548656464f, -0.2726213634f, -0.2902846634f, -0.3078496456f, -0.3253102899f, -0.3426607251f, -0.3598950505f, -0.3770074248f, -0.3939920366f, -0.4108431637f, -0.4275550842f, -0.4441221356f, -0.4605387151f, -0.4767992198f, -0.492898196f, -0.5088301301f, -0.5245896578f, -0.5401714444f, -0.5555702448f, -0.5707807541f, -0.5857978463f, -0.6006164551f, -0.6152315736f, -0.6296382546f, -0.6438315511f, -0.6578066945f, -0.6715589762f, -0.6850836873f, -0.6983762383f, -0.7114322186f, -0.724247098f, -0.7368165851f, -0.7491363883f, -0.761202395f, -0.7730104327f, -0.7845565677f, -0.7958369255f, -0.8068475723f, -0.8175848126f, -0.8280450702f, -0.838224709f, -0.8481203318f, -0.8577286005f, -0.867046237f, -0.8760700822f, -0.8847970963f, -0.893224299f, -0.9013488293f, -0.909168005f, -0.9166790843f, -0.9238795042f, -0.9307669401f, -0.9373390079f, -0.9435934424f, -0.9495281577f, -0.9551411867f, -0.9604305029f, -0.9653944373f, -0.9700312614f, -0.974339366f, -0.97831738f, -0.9819638729f, -0.9852776527f, -0.988257587f, -0.9909026623f, -0.993211925f, -0.9951847196f, -0.996820271f, -0.9981181026f, -0.9990777373f, -0.9996988177f, -0.9999811649f, -0.9999247193f, -0.9995294213f, -0.9987954497f, -0.997723043f, -0.9963126183f, -0.9945645928f, -0.9924795628f, -0.9900581837f, -0.9873014092f, -0.9842100739f, -0.9807852507f, -0.9770281315f, -0.9729399681f, -0.9685220718f, -0.963776052f, -0.9587034583f, -0.9533060193f, -0.9475855827f, -0.9415440559f, -0.9351835251f, -0.9285060763f, -0.9215140343f, -0.9142097831f, -0.9065957069f, -0.8986744881f, -0.8904487491f, -0.8819212914f, -0.8730949759f, -0.8639728427f, -0.854557991f, -0.84485358f, -0.8348628879f, -0.8245893121f, -0.8140363097f, -0.8032075167f, -0.7921065688f, -0.7807372212f, -0.7691033483f, -0.7572088242f, -0.7450577617f, -0.7326542735f, -0.720002532f, -0.7071067691f, -0.6939714551f, -0.6806010008f, -0.6669999361f, -0.6531728506f, -0.6391244531f, -0.6248595119f, -0.6103827953f, -0.5956993103f, -0.5808139443f, -0.5657318234f, -0.5504579544f, -0.534997642f, -0.5193560123f, -0.5035383701f, -0.4875501692f, -0.4713967443f, -0.4550835788f, -0.438616246f, -0.4220002592f, -0.4052413106f, -0.3883450329f, -0.3713172078f, -0.3541635275f, -0.336889863f, -0.3195020258f, -0.3020059466f, -0.2844075263f, -0.266712755f, -0.2489276081f, -0.2310581058f, -0.2131103128f, -0.1950903237f, -0.1770042181f, -0.1588581502f, -0.1406582445f, -0.1224106774f, -0.1041216329f, -0.08579730988f, -0.06744392216f, -0.04906767607f, -0.030674804f, -0.01227153838f, 0.006135884672f, 0.02454122901f, 0.0429382585f, 0.061320737f, 0.07968243957f, 0.09801714122f, 0.1163186282f, 0.1345807016f, 0.1527971923f, 0.1709618866f, 0.1890686601f, 0.2071113735f, 0.2250839174f, 0.2429801822f, 0.2607941031f, 0.27851969f, 0.296150893f, 0.3136817515f, 0.3311063051f, 0.3484186828f, 0.3656129837f, 0.3826834261f, 0.3996241987f, 0.4164295495f, 0.433093816f, 0.449611336f, 0.4659765065f, 0.4821837842f, 0.4982276559f, 0.514102757f, 0.5298036337f, 0.5453249812f, 0.5606615543f, 0.5758081675f, 0.5907596946f, 0.6055110693f, 0.6200572252f, 0.6343932748f, 0.64851439f, 0.6624158025f, 0.6760926843f, 0.689540565f, 0.7027547359f, 0.7157308459f, 0.728464365f, 0.7409511209f, 0.7531868219f, 0.7651672363f, 0.7768884897f, 0.7883464098f, 0.7995372415f, 0.81045717f, 0.8211025f, 0.8314695954f, 0.8415549994f, 0.851355195f, 0.8608669639f, 0.8700869679f, 0.8790122271f, 0.8876396418f, 0.8959662318f, 0.903989315f, 0.9117060304f, 0.9191138744f, 0.9262102246f, 0.932992816f, 0.9394592047f, 0.9456073046f, 0.9514350295f, 0.9569403529f, 0.9621214271f, 0.9669764638f, 0.9715039134f, 0.975702107f, 0.9795697927f, 0.9831054807f, 0.9863080978f, 0.9891765118f, 0.9917097688f, 0.9939069748f, 0.9957674146f, 0.9972904325f, 0.9984755516f, 0.9993223548f, 0.9998306036f, -0.0f, -0.01840673015f, -0.03680722415f, -0.05519524589f, -0.07356456667f, -0.09190895408f, -0.1102222055f, -0.1284981072f, -0.1467304677f, -0.1649131179f, -0.1830398887f, -0.201104641f, -0.2191012353f, -0.2370236069f, -0.2548656464f, -0.2726213634f, -0.2902846634f, -0.3078496456f, -0.3253102899f, -0.3426607251f, -0.3598950505f, -0.3770074248f, -0.3939920366f, -0.4108431637f, -0.4275550842f, -0.4441221356f, -0.4605387151f, -0.4767992198f, -0.492898196f, -0.5088301301f, -0.5245896578f, -0.5401714444f, -0.5555702448f, -0.5707807541f, -0.5857978463f, -0.6006164551f, -0.6152315736f, -0.6296382546f, -0.6438315511f, -0.6578066945f, -0.6715589762f, -0.6850836873f, -0.6983762383f, -0.7114322186f, -0.724247098f, -0.7368165851f, -0.7491363883f, -0.761202395f, -0.7730104327f, -0.7845565677f, -0.7958369255f, -0.8068475723f, -0.8175848126f, -0.8280450702f, -0.838224709f, -0.8481203318f, -0.8577286005f, -0.867046237f, -0.8760700822f, -0.8847970963f, -0.893224299f, -0.9013488293f, -0.909168005f, -0.9166790843f, -0.9238795042f, -0.9307669401f, -0.9373390079f, -0.9435934424f, -0.9495281577f, -0.9551411867f, -0.9604305029f, -0.9653944373f, -0.9700312614f, -0.974339366f, -0.97831738f, -0.9819638729f, -0.9852776527f, -0.988257587f, -0.9909026623f, -0.993211925f, -0.9951847196f, -0.996820271f, -0.9981181026f, -0.9990777373f, -0.9996988177f, -0.9999811649f, -0.9999247193f, -0.9995294213f, -0.9987954497f, -0.997723043f, -0.9963126183f, -0.9945645928f, -0.9924795628f, -0.9900581837f, -0.9873014092f, -0.9842100739f, -0.9807852507f, -0.9770281315f, -0.9729399681f, -0.9685220718f, -0.963776052f, -0.9587034583f, -0.9533060193f, -0.9475855827f, -0.9415440559f, -0.9351835251f, -0.9285060763f, -0.9215140343f, -0.9142097831f, -0.9065957069f, -0.8986744881f, -0.8904487491f, -0.8819212914f, -0.8730949759f, -0.8639728427f, -0.854557991f, -0.84485358f, -0.8348628879f, -0.8245893121f, -0.8140363097f, -0.8032075167f, -0.7921065688f, -0.7807372212f, -0.7691033483f, -0.7572088242f, -0.7450577617f, -0.7326542735f, -0.720002532f, -0.7071067691f, -0.6939714551f, -0.6806010008f, -0.6669999361f, -0.6531728506f, -0.6391244531f, -0.6248595119f, -0.6103827953f, -0.5956993103f, -0.5808139443f, -0.5657318234f, -0.5504579544f, -0.534997642f, -0.5193560123f, -0.5035383701f, -0.4875501692f, -0.4713967443f, -0.4550835788f, -0.438616246f, -0.4220002592f, -0.4052413106f, -0.3883450329f, -0.3713172078f, -0.3541635275f, -0.336889863f, -0.3195020258f, -0.3020059466f, -0.2844075263f, -0.266712755f, -0.2489276081f, -0.2310581058f, -0.2131103128f, -0.1950903237f, -0.1770042181f, -0.1588581502f, -0.1406582445f, -0.1224106774f, -0.1041216329f, -0.08579730988f, -0.06744392216f, -0.04906767607f, -0.030674804f, -0.01227153838f, 0.006135884672f, 0.02454122901f, 0.0429382585f, 0.061320737f, 0.07968243957f, 0.09801714122f, 0.1163186282f, 0.1345807016f, 0.1527971923f, 0.1709618866f, 0.1890686601f, 0.2071113735f, 0.2250839174f, 0.2429801822f, 0.2607941031f, 0.27851969f, 0.296150893f, 0.3136817515f, 0.3311063051f, 0.3484186828f, 0.3656129837f, 0.3826834261f, 0.3996241987f, 0.4164295495f, 0.433093816f, 0.449611336f, 0.4659765065f, 0.4821837842f, 0.4982276559f, 0.514102757f, 0.5298036337f, 0.5453249812f, 0.5606615543f, 0.5758081675f, 0.5907596946f, 0.6055110693f, 0.6200572252f, 0.6343932748f, 0.64851439f, 0.6624158025f, 0.6760926843f, 0.689540565f, 0.7027547359f, 0.7157308459f, 0.728464365f, 0.7409511209f, 0.7531868219f, 0.7651672363f, 0.7768884897f, 0.7883464098f, 0.7995372415f, 0.81045717f, 0.8211025f, 0.8314695954f, 0.8415549994f, 0.851355195f, 0.8608669639f, 0.8700869679f, 0.8790122271f, 0.8876396418f, 0.8959662318f, 0.903989315f, 0.9117060304f, 0.9191138744f, 0.9262102246f, 0.932992816f, 0.9394592047f, 0.9456073046f, 0.9514350295f, 0.9569403529f, 0.9621214271f, 0.9669764638f, 0.9715039134f, 0.975702107f, 0.9795697927f, 0.9831054807f, 0.9863080978f, 0.9891765118f, 0.9917097688f, 0.9939069748f, 0.9957674146f, 0.9972904325f, 0.9984755516f, 0.9993223548f, 0.9998306036f}; -constant float ts20[512] = {-0.0f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, -1.0f, -0.9987954497f, -0.9951847196f, -0.9891765118f, -0.9807852507f, -0.9700312614f, -0.9569403529f, -0.9415440559f, -0.9238795042f, -0.903989315f, -0.8819212914f, -0.8577286005f, -0.8314695954f, -0.8032075167f, -0.7730104327f, -0.7409511209f, -0.7071067691f, -0.6715589762f, -0.6343932748f, -0.5956993103f, -0.5555702448f, -0.514102757f, -0.4713967443f, -0.4275550842f, -0.3826834261f, -0.336889863f, -0.2902846634f, -0.2429801822f, -0.1950903237f, -0.1467304677f, -0.09801714122f, -0.04906767607f, -0.0f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, -1.0f, -0.9987954497f, -0.9951847196f, -0.9891765118f, -0.9807852507f, -0.9700312614f, -0.9569403529f, -0.9415440559f, -0.9238795042f, -0.903989315f, -0.8819212914f, -0.8577286005f, -0.8314695954f, -0.8032075167f, -0.7730104327f, -0.7409511209f, -0.7071067691f, -0.6715589762f, -0.6343932748f, -0.5956993103f, -0.5555702448f, -0.514102757f, -0.4713967443f, -0.4275550842f, -0.3826834261f, -0.336889863f, -0.2902846634f, -0.2429801822f, -0.1950903237f, -0.1467304677f, -0.09801714122f, -0.04906767607f, -0.0f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, -1.0f, -0.9987954497f, -0.9951847196f, -0.9891765118f, -0.9807852507f, -0.9700312614f, -0.9569403529f, -0.9415440559f, -0.9238795042f, -0.903989315f, -0.8819212914f, -0.8577286005f, -0.8314695954f, -0.8032075167f, -0.7730104327f, -0.7409511209f, -0.7071067691f, -0.6715589762f, -0.6343932748f, -0.5956993103f, -0.5555702448f, -0.514102757f, -0.4713967443f, -0.4275550842f, -0.3826834261f, -0.336889863f, -0.2902846634f, -0.2429801822f, -0.1950903237f, -0.1467304677f, -0.09801714122f, -0.04906767607f, -0.0f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, -1.0f, -0.9987954497f, -0.9951847196f, -0.9891765118f, -0.9807852507f, -0.9700312614f, -0.9569403529f, -0.9415440559f, -0.9238795042f, -0.903989315f, -0.8819212914f, -0.8577286005f, -0.8314695954f, -0.8032075167f, -0.7730104327f, -0.7409511209f, -0.7071067691f, -0.6715589762f, -0.6343932748f, -0.5956993103f, -0.5555702448f, -0.514102757f, -0.4713967443f, -0.4275550842f, -0.3826834261f, -0.336889863f, -0.2902846634f, -0.2429801822f, -0.1950903237f, -0.1467304677f, -0.09801714122f, -0.04906767607f, -0.0f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, -1.0f, -0.9987954497f, -0.9951847196f, -0.9891765118f, -0.9807852507f, -0.9700312614f, -0.9569403529f, -0.9415440559f, -0.9238795042f, -0.903989315f, -0.8819212914f, -0.8577286005f, -0.8314695954f, -0.8032075167f, -0.7730104327f, -0.7409511209f, -0.7071067691f, -0.6715589762f, -0.6343932748f, -0.5956993103f, -0.5555702448f, -0.514102757f, -0.4713967443f, -0.4275550842f, -0.3826834261f, -0.336889863f, -0.2902846634f, -0.2429801822f, -0.1950903237f, -0.1467304677f, -0.09801714122f, -0.04906767607f, -0.0f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, -1.0f, -0.9987954497f, -0.9951847196f, -0.9891765118f, -0.9807852507f, -0.9700312614f, -0.9569403529f, -0.9415440559f, -0.9238795042f, -0.903989315f, -0.8819212914f, -0.8577286005f, -0.8314695954f, -0.8032075167f, -0.7730104327f, -0.7409511209f, -0.7071067691f, -0.6715589762f, -0.6343932748f, -0.5956993103f, -0.5555702448f, -0.514102757f, -0.4713967443f, -0.4275550842f, -0.3826834261f, -0.336889863f, -0.2902846634f, -0.2429801822f, -0.1950903237f, -0.1467304677f, -0.09801714122f, -0.04906767607f, -0.0f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, -1.0f, -0.9987954497f, -0.9951847196f, -0.9891765118f, -0.9807852507f, -0.9700312614f, -0.9569403529f, -0.9415440559f, -0.9238795042f, -0.903989315f, -0.8819212914f, -0.8577286005f, -0.8314695954f, -0.8032075167f, -0.7730104327f, -0.7409511209f, -0.7071067691f, -0.6715589762f, -0.6343932748f, -0.5956993103f, -0.5555702448f, -0.514102757f, -0.4713967443f, -0.4275550842f, -0.3826834261f, -0.336889863f, -0.2902846634f, -0.2429801822f, -0.1950903237f, -0.1467304677f, -0.09801714122f, -0.04906767607f, -0.0f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, -1.0f, -0.9987954497f, -0.9951847196f, -0.9891765118f, -0.9807852507f, -0.9700312614f, -0.9569403529f, -0.9415440559f, -0.9238795042f, -0.903989315f, -0.8819212914f, -0.8577286005f, -0.8314695954f, -0.8032075167f, -0.7730104327f, -0.7409511209f, -0.7071067691f, -0.6715589762f, -0.6343932748f, -0.5956993103f, -0.5555702448f, -0.514102757f, -0.4713967443f, -0.4275550842f, -0.3826834261f, -0.336889863f, -0.2902846634f, -0.2429801822f, -0.1950903237f, -0.1467304677f, -0.09801714122f, -0.04906767607f}; -constant float ts23[512] = {-0.0f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, -1.0f, -0.9987954497f, -0.9951847196f, -0.9891765118f, -0.9807852507f, -0.9700312614f, -0.9569403529f, -0.9415440559f, -0.9238795042f, -0.903989315f, -0.8819212914f, -0.8577286005f, -0.8314695954f, -0.8032075167f, -0.7730104327f, -0.7409511209f, -0.7071067691f, -0.6715589762f, -0.6343932748f, -0.5956993103f, -0.5555702448f, -0.514102757f, -0.4713967443f, -0.4275550842f, -0.3826834261f, -0.336889863f, -0.2902846634f, -0.2429801822f, -0.1950903237f, -0.1467304677f, -0.09801714122f, -0.04906767607f, -0.0f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, -1.0f, -0.9987954497f, -0.9951847196f, -0.9891765118f, -0.9807852507f, -0.9700312614f, -0.9569403529f, -0.9415440559f, -0.9238795042f, -0.903989315f, -0.8819212914f, -0.8577286005f, -0.8314695954f, -0.8032075167f, -0.7730104327f, -0.7409511209f, -0.7071067691f, -0.6715589762f, -0.6343932748f, -0.5956993103f, -0.5555702448f, -0.514102757f, -0.4713967443f, -0.4275550842f, -0.3826834261f, -0.336889863f, -0.2902846634f, -0.2429801822f, -0.1950903237f, -0.1467304677f, -0.09801714122f, -0.04906767607f, -0.0f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, -1.0f, -0.9987954497f, -0.9951847196f, -0.9891765118f, -0.9807852507f, -0.9700312614f, -0.9569403529f, -0.9415440559f, -0.9238795042f, -0.903989315f, -0.8819212914f, -0.8577286005f, -0.8314695954f, -0.8032075167f, -0.7730104327f, -0.7409511209f, -0.7071067691f, -0.6715589762f, -0.6343932748f, -0.5956993103f, -0.5555702448f, -0.514102757f, -0.4713967443f, -0.4275550842f, -0.3826834261f, -0.336889863f, -0.2902846634f, -0.2429801822f, -0.1950903237f, -0.1467304677f, -0.09801714122f, -0.04906767607f, -0.0f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, -1.0f, -0.9987954497f, -0.9951847196f, -0.9891765118f, -0.9807852507f, -0.9700312614f, -0.9569403529f, -0.9415440559f, -0.9238795042f, -0.903989315f, -0.8819212914f, -0.8577286005f, -0.8314695954f, -0.8032075167f, -0.7730104327f, -0.7409511209f, -0.7071067691f, -0.6715589762f, -0.6343932748f, -0.5956993103f, -0.5555702448f, -0.514102757f, -0.4713967443f, -0.4275550842f, -0.3826834261f, -0.336889863f, -0.2902846634f, -0.2429801822f, -0.1950903237f, -0.1467304677f, -0.09801714122f, -0.04906767607f, -0.0f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, -1.0f, -0.9987954497f, -0.9951847196f, -0.9891765118f, -0.9807852507f, -0.9700312614f, -0.9569403529f, -0.9415440559f, -0.9238795042f, -0.903989315f, -0.8819212914f, -0.8577286005f, -0.8314695954f, -0.8032075167f, -0.7730104327f, -0.7409511209f, -0.7071067691f, -0.6715589762f, -0.6343932748f, -0.5956993103f, -0.5555702448f, -0.514102757f, -0.4713967443f, -0.4275550842f, -0.3826834261f, -0.336889863f, -0.2902846634f, -0.2429801822f, -0.1950903237f, -0.1467304677f, -0.09801714122f, -0.04906767607f, -0.0f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, -1.0f, -0.9987954497f, -0.9951847196f, -0.9891765118f, -0.9807852507f, -0.9700312614f, -0.9569403529f, -0.9415440559f, -0.9238795042f, -0.903989315f, -0.8819212914f, -0.8577286005f, -0.8314695954f, -0.8032075167f, -0.7730104327f, -0.7409511209f, -0.7071067691f, -0.6715589762f, -0.6343932748f, -0.5956993103f, -0.5555702448f, -0.514102757f, -0.4713967443f, -0.4275550842f, -0.3826834261f, -0.336889863f, -0.2902846634f, -0.2429801822f, -0.1950903237f, -0.1467304677f, -0.09801714122f, -0.04906767607f, -0.0f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, -1.0f, -0.9987954497f, -0.9951847196f, -0.9891765118f, -0.9807852507f, -0.9700312614f, -0.9569403529f, -0.9415440559f, -0.9238795042f, -0.903989315f, -0.8819212914f, -0.8577286005f, -0.8314695954f, -0.8032075167f, -0.7730104327f, -0.7409511209f, -0.7071067691f, -0.6715589762f, -0.6343932748f, -0.5956993103f, -0.5555702448f, -0.514102757f, -0.4713967443f, -0.4275550842f, -0.3826834261f, -0.336889863f, -0.2902846634f, -0.2429801822f, -0.1950903237f, -0.1467304677f, -0.09801714122f, -0.04906767607f, -0.0f, -0.04906767607f, -0.09801714122f, -0.1467304677f, -0.1950903237f, -0.2429801822f, -0.2902846634f, -0.336889863f, -0.3826834261f, -0.4275550842f, -0.4713967443f, -0.514102757f, -0.5555702448f, -0.5956993103f, -0.6343932748f, -0.6715589762f, -0.7071067691f, -0.7409511209f, -0.7730104327f, -0.8032075167f, -0.8314695954f, -0.8577286005f, -0.8819212914f, -0.903989315f, -0.9238795042f, -0.9415440559f, -0.9569403529f, -0.9700312614f, -0.9807852507f, -0.9891765118f, -0.9951847196f, -0.9987954497f, -1.0f, -0.9987954497f, -0.9951847196f, -0.9891765118f, -0.9807852507f, -0.9700312614f, -0.9569403529f, -0.9415440559f, -0.9238795042f, -0.903989315f, -0.8819212914f, -0.8577286005f, -0.8314695954f, -0.8032075167f, -0.7730104327f, -0.7409511209f, -0.7071067691f, -0.6715589762f, -0.6343932748f, -0.5956993103f, -0.5555702448f, -0.514102757f, -0.4713967443f, -0.4275550842f, -0.3826834261f, -0.336889863f, -0.2902846634f, -0.2429801822f, -0.1950903237f, -0.1467304677f, -0.09801714122f, -0.04906767607f}; -constant float ts21[512] = {-0.0f, -0.02454122901f, -0.04906767607f, -0.07356456667f, -0.09801714122f, -0.1224106774f, -0.1467304677f, -0.1709618866f, -0.1950903237f, -0.2191012353f, -0.2429801822f, -0.266712755f, -0.2902846634f, -0.3136817515f, -0.336889863f, -0.3598950505f, -0.3826834261f, -0.4052413106f, -0.4275550842f, -0.449611336f, -0.4713967443f, -0.492898196f, -0.514102757f, -0.534997642f, -0.5555702448f, -0.5758081675f, -0.5956993103f, -0.6152315736f, -0.6343932748f, -0.6531728506f, -0.6715589762f, -0.689540565f, -0.7071067691f, -0.724247098f, -0.7409511209f, -0.7572088242f, -0.7730104327f, -0.7883464098f, -0.8032075167f, -0.8175848126f, -0.8314695954f, -0.84485358f, -0.8577286005f, -0.8700869679f, -0.8819212914f, -0.893224299f, -0.903989315f, -0.9142097831f, -0.9238795042f, -0.932992816f, -0.9415440559f, -0.9495281577f, -0.9569403529f, -0.963776052f, -0.9700312614f, -0.975702107f, -0.9807852507f, -0.9852776527f, -0.9891765118f, -0.9924795628f, -0.9951847196f, -0.9972904325f, -0.9987954497f, -0.9996988177f, -0.0f, -0.02454122901f, -0.04906767607f, -0.07356456667f, -0.09801714122f, -0.1224106774f, -0.1467304677f, -0.1709618866f, -0.1950903237f, -0.2191012353f, -0.2429801822f, -0.266712755f, -0.2902846634f, -0.3136817515f, -0.336889863f, -0.3598950505f, -0.3826834261f, -0.4052413106f, -0.4275550842f, -0.449611336f, -0.4713967443f, -0.492898196f, -0.514102757f, -0.534997642f, -0.5555702448f, -0.5758081675f, -0.5956993103f, -0.6152315736f, -0.6343932748f, -0.6531728506f, -0.6715589762f, -0.689540565f, -0.7071067691f, -0.724247098f, -0.7409511209f, -0.7572088242f, -0.7730104327f, -0.7883464098f, -0.8032075167f, -0.8175848126f, -0.8314695954f, -0.84485358f, -0.8577286005f, -0.8700869679f, -0.8819212914f, -0.893224299f, -0.903989315f, -0.9142097831f, -0.9238795042f, -0.932992816f, -0.9415440559f, -0.9495281577f, -0.9569403529f, -0.963776052f, -0.9700312614f, -0.975702107f, -0.9807852507f, -0.9852776527f, -0.9891765118f, -0.9924795628f, -0.9951847196f, -0.9972904325f, -0.9987954497f, -0.9996988177f, -0.0f, -0.02454122901f, -0.04906767607f, -0.07356456667f, -0.09801714122f, -0.1224106774f, -0.1467304677f, -0.1709618866f, -0.1950903237f, -0.2191012353f, -0.2429801822f, -0.266712755f, -0.2902846634f, -0.3136817515f, -0.336889863f, -0.3598950505f, -0.3826834261f, -0.4052413106f, -0.4275550842f, -0.449611336f, -0.4713967443f, -0.492898196f, -0.514102757f, -0.534997642f, -0.5555702448f, -0.5758081675f, -0.5956993103f, -0.6152315736f, -0.6343932748f, -0.6531728506f, -0.6715589762f, -0.689540565f, -0.7071067691f, -0.724247098f, -0.7409511209f, -0.7572088242f, -0.7730104327f, -0.7883464098f, -0.8032075167f, -0.8175848126f, -0.8314695954f, -0.84485358f, -0.8577286005f, -0.8700869679f, -0.8819212914f, -0.893224299f, -0.903989315f, -0.9142097831f, -0.9238795042f, -0.932992816f, -0.9415440559f, -0.9495281577f, -0.9569403529f, -0.963776052f, -0.9700312614f, -0.975702107f, -0.9807852507f, -0.9852776527f, -0.9891765118f, -0.9924795628f, -0.9951847196f, -0.9972904325f, -0.9987954497f, -0.9996988177f, -0.0f, -0.02454122901f, -0.04906767607f, -0.07356456667f, -0.09801714122f, -0.1224106774f, -0.1467304677f, -0.1709618866f, -0.1950903237f, -0.2191012353f, -0.2429801822f, -0.266712755f, -0.2902846634f, -0.3136817515f, -0.336889863f, -0.3598950505f, -0.3826834261f, -0.4052413106f, -0.4275550842f, -0.449611336f, -0.4713967443f, -0.492898196f, -0.514102757f, -0.534997642f, -0.5555702448f, -0.5758081675f, -0.5956993103f, -0.6152315736f, -0.6343932748f, -0.6531728506f, -0.6715589762f, -0.689540565f, -0.7071067691f, -0.724247098f, -0.7409511209f, -0.7572088242f, -0.7730104327f, -0.7883464098f, -0.8032075167f, -0.8175848126f, -0.8314695954f, -0.84485358f, -0.8577286005f, -0.8700869679f, -0.8819212914f, -0.893224299f, -0.903989315f, -0.9142097831f, -0.9238795042f, -0.932992816f, -0.9415440559f, -0.9495281577f, -0.9569403529f, -0.963776052f, -0.9700312614f, -0.975702107f, -0.9807852507f, -0.9852776527f, -0.9891765118f, -0.9924795628f, -0.9951847196f, -0.9972904325f, -0.9987954497f, -0.9996988177f, -0.0f, -0.02454122901f, -0.04906767607f, -0.07356456667f, -0.09801714122f, -0.1224106774f, -0.1467304677f, -0.1709618866f, -0.1950903237f, -0.2191012353f, -0.2429801822f, -0.266712755f, -0.2902846634f, -0.3136817515f, -0.336889863f, -0.3598950505f, -0.3826834261f, -0.4052413106f, -0.4275550842f, -0.449611336f, -0.4713967443f, -0.492898196f, -0.514102757f, -0.534997642f, -0.5555702448f, -0.5758081675f, -0.5956993103f, -0.6152315736f, -0.6343932748f, -0.6531728506f, -0.6715589762f, -0.689540565f, -0.7071067691f, -0.724247098f, -0.7409511209f, -0.7572088242f, -0.7730104327f, -0.7883464098f, -0.8032075167f, -0.8175848126f, -0.8314695954f, -0.84485358f, -0.8577286005f, -0.8700869679f, -0.8819212914f, -0.893224299f, -0.903989315f, -0.9142097831f, -0.9238795042f, -0.932992816f, -0.9415440559f, -0.9495281577f, -0.9569403529f, -0.963776052f, -0.9700312614f, -0.975702107f, -0.9807852507f, -0.9852776527f, -0.9891765118f, -0.9924795628f, -0.9951847196f, -0.9972904325f, -0.9987954497f, -0.9996988177f, -0.0f, -0.02454122901f, -0.04906767607f, -0.07356456667f, -0.09801714122f, -0.1224106774f, -0.1467304677f, -0.1709618866f, -0.1950903237f, -0.2191012353f, -0.2429801822f, -0.266712755f, -0.2902846634f, -0.3136817515f, -0.336889863f, -0.3598950505f, -0.3826834261f, -0.4052413106f, -0.4275550842f, -0.449611336f, -0.4713967443f, -0.492898196f, -0.514102757f, -0.534997642f, -0.5555702448f, -0.5758081675f, -0.5956993103f, -0.6152315736f, -0.6343932748f, -0.6531728506f, -0.6715589762f, -0.689540565f, -0.7071067691f, -0.724247098f, -0.7409511209f, -0.7572088242f, -0.7730104327f, -0.7883464098f, -0.8032075167f, -0.8175848126f, -0.8314695954f, -0.84485358f, -0.8577286005f, -0.8700869679f, -0.8819212914f, -0.893224299f, -0.903989315f, -0.9142097831f, -0.9238795042f, -0.932992816f, -0.9415440559f, -0.9495281577f, -0.9569403529f, -0.963776052f, -0.9700312614f, -0.975702107f, -0.9807852507f, -0.9852776527f, -0.9891765118f, -0.9924795628f, -0.9951847196f, -0.9972904325f, -0.9987954497f, -0.9996988177f, -0.0f, -0.02454122901f, -0.04906767607f, -0.07356456667f, -0.09801714122f, -0.1224106774f, -0.1467304677f, -0.1709618866f, -0.1950903237f, -0.2191012353f, -0.2429801822f, -0.266712755f, -0.2902846634f, -0.3136817515f, -0.336889863f, -0.3598950505f, -0.3826834261f, -0.4052413106f, -0.4275550842f, -0.449611336f, -0.4713967443f, -0.492898196f, -0.514102757f, -0.534997642f, -0.5555702448f, -0.5758081675f, -0.5956993103f, -0.6152315736f, -0.6343932748f, -0.6531728506f, -0.6715589762f, -0.689540565f, -0.7071067691f, -0.724247098f, -0.7409511209f, -0.7572088242f, -0.7730104327f, -0.7883464098f, -0.8032075167f, -0.8175848126f, -0.8314695954f, -0.84485358f, -0.8577286005f, -0.8700869679f, -0.8819212914f, -0.893224299f, -0.903989315f, -0.9142097831f, -0.9238795042f, -0.932992816f, -0.9415440559f, -0.9495281577f, -0.9569403529f, -0.963776052f, -0.9700312614f, -0.975702107f, -0.9807852507f, -0.9852776527f, -0.9891765118f, -0.9924795628f, -0.9951847196f, -0.9972904325f, -0.9987954497f, -0.9996988177f, -0.0f, -0.02454122901f, -0.04906767607f, -0.07356456667f, -0.09801714122f, -0.1224106774f, -0.1467304677f, -0.1709618866f, -0.1950903237f, -0.2191012353f, -0.2429801822f, -0.266712755f, -0.2902846634f, -0.3136817515f, -0.336889863f, -0.3598950505f, -0.3826834261f, -0.4052413106f, -0.4275550842f, -0.449611336f, -0.4713967443f, -0.492898196f, -0.514102757f, -0.534997642f, -0.5555702448f, -0.5758081675f, -0.5956993103f, -0.6152315736f, -0.6343932748f, -0.6531728506f, -0.6715589762f, -0.689540565f, -0.7071067691f, -0.724247098f, -0.7409511209f, -0.7572088242f, -0.7730104327f, -0.7883464098f, -0.8032075167f, -0.8175848126f, -0.8314695954f, -0.84485358f, -0.8577286005f, -0.8700869679f, -0.8819212914f, -0.893224299f, -0.903989315f, -0.9142097831f, -0.9238795042f, -0.932992816f, -0.9415440559f, -0.9495281577f, -0.9569403529f, -0.963776052f, -0.9700312614f, -0.975702107f, -0.9807852507f, -0.9852776527f, -0.9891765118f, -0.9924795628f, -0.9951847196f, -0.9972904325f, -0.9987954497f, -0.9996988177f}; -constant float ts24[512] = {-0.0f, -0.02454122901f, -0.04906767607f, -0.07356456667f, -0.09801714122f, -0.1224106774f, -0.1467304677f, -0.1709618866f, -0.1950903237f, -0.2191012353f, -0.2429801822f, -0.266712755f, -0.2902846634f, -0.3136817515f, -0.336889863f, -0.3598950505f, -0.3826834261f, -0.4052413106f, -0.4275550842f, -0.449611336f, -0.4713967443f, -0.492898196f, -0.514102757f, -0.534997642f, -0.5555702448f, -0.5758081675f, -0.5956993103f, -0.6152315736f, -0.6343932748f, -0.6531728506f, -0.6715589762f, -0.689540565f, -0.7071067691f, -0.724247098f, -0.7409511209f, -0.7572088242f, -0.7730104327f, -0.7883464098f, -0.8032075167f, -0.8175848126f, -0.8314695954f, -0.84485358f, -0.8577286005f, -0.8700869679f, -0.8819212914f, -0.893224299f, -0.903989315f, -0.9142097831f, -0.9238795042f, -0.932992816f, -0.9415440559f, -0.9495281577f, -0.9569403529f, -0.963776052f, -0.9700312614f, -0.975702107f, -0.9807852507f, -0.9852776527f, -0.9891765118f, -0.9924795628f, -0.9951847196f, -0.9972904325f, -0.9987954497f, -0.9996988177f, -0.0f, -0.02454122901f, -0.04906767607f, -0.07356456667f, -0.09801714122f, -0.1224106774f, -0.1467304677f, -0.1709618866f, -0.1950903237f, -0.2191012353f, -0.2429801822f, -0.266712755f, -0.2902846634f, -0.3136817515f, -0.336889863f, -0.3598950505f, -0.3826834261f, -0.4052413106f, -0.4275550842f, -0.449611336f, -0.4713967443f, -0.492898196f, -0.514102757f, -0.534997642f, -0.5555702448f, -0.5758081675f, -0.5956993103f, -0.6152315736f, -0.6343932748f, -0.6531728506f, -0.6715589762f, -0.689540565f, -0.7071067691f, -0.724247098f, -0.7409511209f, -0.7572088242f, -0.7730104327f, -0.7883464098f, -0.8032075167f, -0.8175848126f, -0.8314695954f, -0.84485358f, -0.8577286005f, -0.8700869679f, -0.8819212914f, -0.893224299f, -0.903989315f, -0.9142097831f, -0.9238795042f, -0.932992816f, -0.9415440559f, -0.9495281577f, -0.9569403529f, -0.963776052f, -0.9700312614f, -0.975702107f, -0.9807852507f, -0.9852776527f, -0.9891765118f, -0.9924795628f, -0.9951847196f, -0.9972904325f, -0.9987954497f, -0.9996988177f, -0.0f, -0.02454122901f, -0.04906767607f, -0.07356456667f, -0.09801714122f, -0.1224106774f, -0.1467304677f, -0.1709618866f, -0.1950903237f, -0.2191012353f, -0.2429801822f, -0.266712755f, -0.2902846634f, -0.3136817515f, -0.336889863f, -0.3598950505f, -0.3826834261f, -0.4052413106f, -0.4275550842f, -0.449611336f, -0.4713967443f, -0.492898196f, -0.514102757f, -0.534997642f, -0.5555702448f, -0.5758081675f, -0.5956993103f, -0.6152315736f, -0.6343932748f, -0.6531728506f, -0.6715589762f, -0.689540565f, -0.7071067691f, -0.724247098f, -0.7409511209f, -0.7572088242f, -0.7730104327f, -0.7883464098f, -0.8032075167f, -0.8175848126f, -0.8314695954f, -0.84485358f, -0.8577286005f, -0.8700869679f, -0.8819212914f, -0.893224299f, -0.903989315f, -0.9142097831f, -0.9238795042f, -0.932992816f, -0.9415440559f, -0.9495281577f, -0.9569403529f, -0.963776052f, -0.9700312614f, -0.975702107f, -0.9807852507f, -0.9852776527f, -0.9891765118f, -0.9924795628f, -0.9951847196f, -0.9972904325f, -0.9987954497f, -0.9996988177f, -0.0f, -0.02454122901f, -0.04906767607f, -0.07356456667f, -0.09801714122f, -0.1224106774f, -0.1467304677f, -0.1709618866f, -0.1950903237f, -0.2191012353f, -0.2429801822f, -0.266712755f, -0.2902846634f, -0.3136817515f, -0.336889863f, -0.3598950505f, -0.3826834261f, -0.4052413106f, -0.4275550842f, -0.449611336f, -0.4713967443f, -0.492898196f, -0.514102757f, -0.534997642f, -0.5555702448f, -0.5758081675f, -0.5956993103f, -0.6152315736f, -0.6343932748f, -0.6531728506f, -0.6715589762f, -0.689540565f, -0.7071067691f, -0.724247098f, -0.7409511209f, -0.7572088242f, -0.7730104327f, -0.7883464098f, -0.8032075167f, -0.8175848126f, -0.8314695954f, -0.84485358f, -0.8577286005f, -0.8700869679f, -0.8819212914f, -0.893224299f, -0.903989315f, -0.9142097831f, -0.9238795042f, -0.932992816f, -0.9415440559f, -0.9495281577f, -0.9569403529f, -0.963776052f, -0.9700312614f, -0.975702107f, -0.9807852507f, -0.9852776527f, -0.9891765118f, -0.9924795628f, -0.9951847196f, -0.9972904325f, -0.9987954497f, -0.9996988177f, -0.0f, -0.02454122901f, -0.04906767607f, -0.07356456667f, -0.09801714122f, -0.1224106774f, -0.1467304677f, -0.1709618866f, -0.1950903237f, -0.2191012353f, -0.2429801822f, -0.266712755f, -0.2902846634f, -0.3136817515f, -0.336889863f, -0.3598950505f, -0.3826834261f, -0.4052413106f, -0.4275550842f, -0.449611336f, -0.4713967443f, -0.492898196f, -0.514102757f, -0.534997642f, -0.5555702448f, -0.5758081675f, -0.5956993103f, -0.6152315736f, -0.6343932748f, -0.6531728506f, -0.6715589762f, -0.689540565f, -0.7071067691f, -0.724247098f, -0.7409511209f, -0.7572088242f, -0.7730104327f, -0.7883464098f, -0.8032075167f, -0.8175848126f, -0.8314695954f, -0.84485358f, -0.8577286005f, -0.8700869679f, -0.8819212914f, -0.893224299f, -0.903989315f, -0.9142097831f, -0.9238795042f, -0.932992816f, -0.9415440559f, -0.9495281577f, -0.9569403529f, -0.963776052f, -0.9700312614f, -0.975702107f, -0.9807852507f, -0.9852776527f, -0.9891765118f, -0.9924795628f, -0.9951847196f, -0.9972904325f, -0.9987954497f, -0.9996988177f, -0.0f, -0.02454122901f, -0.04906767607f, -0.07356456667f, -0.09801714122f, -0.1224106774f, -0.1467304677f, -0.1709618866f, -0.1950903237f, -0.2191012353f, -0.2429801822f, -0.266712755f, -0.2902846634f, -0.3136817515f, -0.336889863f, -0.3598950505f, -0.3826834261f, -0.4052413106f, -0.4275550842f, -0.449611336f, -0.4713967443f, -0.492898196f, -0.514102757f, -0.534997642f, -0.5555702448f, -0.5758081675f, -0.5956993103f, -0.6152315736f, -0.6343932748f, -0.6531728506f, -0.6715589762f, -0.689540565f, -0.7071067691f, -0.724247098f, -0.7409511209f, -0.7572088242f, -0.7730104327f, -0.7883464098f, -0.8032075167f, -0.8175848126f, -0.8314695954f, -0.84485358f, -0.8577286005f, -0.8700869679f, -0.8819212914f, -0.893224299f, -0.903989315f, -0.9142097831f, -0.9238795042f, -0.932992816f, -0.9415440559f, -0.9495281577f, -0.9569403529f, -0.963776052f, -0.9700312614f, -0.975702107f, -0.9807852507f, -0.9852776527f, -0.9891765118f, -0.9924795628f, -0.9951847196f, -0.9972904325f, -0.9987954497f, -0.9996988177f, -0.0f, -0.02454122901f, -0.04906767607f, -0.07356456667f, -0.09801714122f, -0.1224106774f, -0.1467304677f, -0.1709618866f, -0.1950903237f, -0.2191012353f, -0.2429801822f, -0.266712755f, -0.2902846634f, -0.3136817515f, -0.336889863f, -0.3598950505f, -0.3826834261f, -0.4052413106f, -0.4275550842f, -0.449611336f, -0.4713967443f, -0.492898196f, -0.514102757f, -0.534997642f, -0.5555702448f, -0.5758081675f, -0.5956993103f, -0.6152315736f, -0.6343932748f, -0.6531728506f, -0.6715589762f, -0.689540565f, -0.7071067691f, -0.724247098f, -0.7409511209f, -0.7572088242f, -0.7730104327f, -0.7883464098f, -0.8032075167f, -0.8175848126f, -0.8314695954f, -0.84485358f, -0.8577286005f, -0.8700869679f, -0.8819212914f, -0.893224299f, -0.903989315f, -0.9142097831f, -0.9238795042f, -0.932992816f, -0.9415440559f, -0.9495281577f, -0.9569403529f, -0.963776052f, -0.9700312614f, -0.975702107f, -0.9807852507f, -0.9852776527f, -0.9891765118f, -0.9924795628f, -0.9951847196f, -0.9972904325f, -0.9987954497f, -0.9996988177f, -0.0f, -0.02454122901f, -0.04906767607f, -0.07356456667f, -0.09801714122f, -0.1224106774f, -0.1467304677f, -0.1709618866f, -0.1950903237f, -0.2191012353f, -0.2429801822f, -0.266712755f, -0.2902846634f, -0.3136817515f, -0.336889863f, -0.3598950505f, -0.3826834261f, -0.4052413106f, -0.4275550842f, -0.449611336f, -0.4713967443f, -0.492898196f, -0.514102757f, -0.534997642f, -0.5555702448f, -0.5758081675f, -0.5956993103f, -0.6152315736f, -0.6343932748f, -0.6531728506f, -0.6715589762f, -0.689540565f, -0.7071067691f, -0.724247098f, -0.7409511209f, -0.7572088242f, -0.7730104327f, -0.7883464098f, -0.8032075167f, -0.8175848126f, -0.8314695954f, -0.84485358f, -0.8577286005f, -0.8700869679f, -0.8819212914f, -0.893224299f, -0.903989315f, -0.9142097831f, -0.9238795042f, -0.932992816f, -0.9415440559f, -0.9495281577f, -0.9569403529f, -0.963776052f, -0.9700312614f, -0.975702107f, -0.9807852507f, -0.9852776527f, -0.9891765118f, -0.9924795628f, -0.9951847196f, -0.9972904325f, -0.9987954497f, -0.9996988177f}; -constant float ts22[512] = {-0.0f, -0.07356456667f, -0.1467304677f, -0.2191012353f, -0.2902846634f, -0.3598950505f, -0.4275550842f, -0.492898196f, -0.5555702448f, -0.6152315736f, -0.6715589762f, -0.724247098f, -0.7730104327f, -0.8175848126f, -0.8577286005f, -0.893224299f, -0.9238795042f, -0.9495281577f, -0.9700312614f, -0.9852776527f, -0.9951847196f, -0.9996988177f, -0.9987954497f, -0.9924795628f, -0.9807852507f, -0.963776052f, -0.9415440559f, -0.9142097831f, -0.8819212914f, -0.84485358f, -0.8032075167f, -0.7572088242f, -0.7071067691f, -0.6531728506f, -0.5956993103f, -0.534997642f, -0.4713967443f, -0.4052413106f, -0.336889863f, -0.266712755f, -0.1950903237f, -0.1224106774f, -0.04906767607f, 0.02454122901f, 0.09801714122f, 0.1709618866f, 0.2429801822f, 0.3136817515f, 0.3826834261f, 0.449611336f, 0.514102757f, 0.5758081675f, 0.6343932748f, 0.689540565f, 0.7409511209f, 0.7883464098f, 0.8314695954f, 0.8700869679f, 0.903989315f, 0.932992816f, 0.9569403529f, 0.975702107f, 0.9891765118f, 0.9972904325f, -0.0f, -0.07356456667f, -0.1467304677f, -0.2191012353f, -0.2902846634f, -0.3598950505f, -0.4275550842f, -0.492898196f, -0.5555702448f, -0.6152315736f, -0.6715589762f, -0.724247098f, -0.7730104327f, -0.8175848126f, -0.8577286005f, -0.893224299f, -0.9238795042f, -0.9495281577f, -0.9700312614f, -0.9852776527f, -0.9951847196f, -0.9996988177f, -0.9987954497f, -0.9924795628f, -0.9807852507f, -0.963776052f, -0.9415440559f, -0.9142097831f, -0.8819212914f, -0.84485358f, -0.8032075167f, -0.7572088242f, -0.7071067691f, -0.6531728506f, -0.5956993103f, -0.534997642f, -0.4713967443f, -0.4052413106f, -0.336889863f, -0.266712755f, -0.1950903237f, -0.1224106774f, -0.04906767607f, 0.02454122901f, 0.09801714122f, 0.1709618866f, 0.2429801822f, 0.3136817515f, 0.3826834261f, 0.449611336f, 0.514102757f, 0.5758081675f, 0.6343932748f, 0.689540565f, 0.7409511209f, 0.7883464098f, 0.8314695954f, 0.8700869679f, 0.903989315f, 0.932992816f, 0.9569403529f, 0.975702107f, 0.9891765118f, 0.9972904325f, -0.0f, -0.07356456667f, -0.1467304677f, -0.2191012353f, -0.2902846634f, -0.3598950505f, -0.4275550842f, -0.492898196f, -0.5555702448f, -0.6152315736f, -0.6715589762f, -0.724247098f, -0.7730104327f, -0.8175848126f, -0.8577286005f, -0.893224299f, -0.9238795042f, -0.9495281577f, -0.9700312614f, -0.9852776527f, -0.9951847196f, -0.9996988177f, -0.9987954497f, -0.9924795628f, -0.9807852507f, -0.963776052f, -0.9415440559f, -0.9142097831f, -0.8819212914f, -0.84485358f, -0.8032075167f, -0.7572088242f, -0.7071067691f, -0.6531728506f, -0.5956993103f, -0.534997642f, -0.4713967443f, -0.4052413106f, -0.336889863f, -0.266712755f, -0.1950903237f, -0.1224106774f, -0.04906767607f, 0.02454122901f, 0.09801714122f, 0.1709618866f, 0.2429801822f, 0.3136817515f, 0.3826834261f, 0.449611336f, 0.514102757f, 0.5758081675f, 0.6343932748f, 0.689540565f, 0.7409511209f, 0.7883464098f, 0.8314695954f, 0.8700869679f, 0.903989315f, 0.932992816f, 0.9569403529f, 0.975702107f, 0.9891765118f, 0.9972904325f, -0.0f, -0.07356456667f, -0.1467304677f, -0.2191012353f, -0.2902846634f, -0.3598950505f, -0.4275550842f, -0.492898196f, -0.5555702448f, -0.6152315736f, -0.6715589762f, -0.724247098f, -0.7730104327f, -0.8175848126f, -0.8577286005f, -0.893224299f, -0.9238795042f, -0.9495281577f, -0.9700312614f, -0.9852776527f, -0.9951847196f, -0.9996988177f, -0.9987954497f, -0.9924795628f, -0.9807852507f, -0.963776052f, -0.9415440559f, -0.9142097831f, -0.8819212914f, -0.84485358f, -0.8032075167f, -0.7572088242f, -0.7071067691f, -0.6531728506f, -0.5956993103f, -0.534997642f, -0.4713967443f, -0.4052413106f, -0.336889863f, -0.266712755f, -0.1950903237f, -0.1224106774f, -0.04906767607f, 0.02454122901f, 0.09801714122f, 0.1709618866f, 0.2429801822f, 0.3136817515f, 0.3826834261f, 0.449611336f, 0.514102757f, 0.5758081675f, 0.6343932748f, 0.689540565f, 0.7409511209f, 0.7883464098f, 0.8314695954f, 0.8700869679f, 0.903989315f, 0.932992816f, 0.9569403529f, 0.975702107f, 0.9891765118f, 0.9972904325f, -0.0f, -0.07356456667f, -0.1467304677f, -0.2191012353f, -0.2902846634f, -0.3598950505f, -0.4275550842f, -0.492898196f, -0.5555702448f, -0.6152315736f, -0.6715589762f, -0.724247098f, -0.7730104327f, -0.8175848126f, -0.8577286005f, -0.893224299f, -0.9238795042f, -0.9495281577f, -0.9700312614f, -0.9852776527f, -0.9951847196f, -0.9996988177f, -0.9987954497f, -0.9924795628f, -0.9807852507f, -0.963776052f, -0.9415440559f, -0.9142097831f, -0.8819212914f, -0.84485358f, -0.8032075167f, -0.7572088242f, -0.7071067691f, -0.6531728506f, -0.5956993103f, -0.534997642f, -0.4713967443f, -0.4052413106f, -0.336889863f, -0.266712755f, -0.1950903237f, -0.1224106774f, -0.04906767607f, 0.02454122901f, 0.09801714122f, 0.1709618866f, 0.2429801822f, 0.3136817515f, 0.3826834261f, 0.449611336f, 0.514102757f, 0.5758081675f, 0.6343932748f, 0.689540565f, 0.7409511209f, 0.7883464098f, 0.8314695954f, 0.8700869679f, 0.903989315f, 0.932992816f, 0.9569403529f, 0.975702107f, 0.9891765118f, 0.9972904325f, -0.0f, -0.07356456667f, -0.1467304677f, -0.2191012353f, -0.2902846634f, -0.3598950505f, -0.4275550842f, -0.492898196f, -0.5555702448f, -0.6152315736f, -0.6715589762f, -0.724247098f, -0.7730104327f, -0.8175848126f, -0.8577286005f, -0.893224299f, -0.9238795042f, -0.9495281577f, -0.9700312614f, -0.9852776527f, -0.9951847196f, -0.9996988177f, -0.9987954497f, -0.9924795628f, -0.9807852507f, -0.963776052f, -0.9415440559f, -0.9142097831f, -0.8819212914f, -0.84485358f, -0.8032075167f, -0.7572088242f, -0.7071067691f, -0.6531728506f, -0.5956993103f, -0.534997642f, -0.4713967443f, -0.4052413106f, -0.336889863f, -0.266712755f, -0.1950903237f, -0.1224106774f, -0.04906767607f, 0.02454122901f, 0.09801714122f, 0.1709618866f, 0.2429801822f, 0.3136817515f, 0.3826834261f, 0.449611336f, 0.514102757f, 0.5758081675f, 0.6343932748f, 0.689540565f, 0.7409511209f, 0.7883464098f, 0.8314695954f, 0.8700869679f, 0.903989315f, 0.932992816f, 0.9569403529f, 0.975702107f, 0.9891765118f, 0.9972904325f, -0.0f, -0.07356456667f, -0.1467304677f, -0.2191012353f, -0.2902846634f, -0.3598950505f, -0.4275550842f, -0.492898196f, -0.5555702448f, -0.6152315736f, -0.6715589762f, -0.724247098f, -0.7730104327f, -0.8175848126f, -0.8577286005f, -0.893224299f, -0.9238795042f, -0.9495281577f, -0.9700312614f, -0.9852776527f, -0.9951847196f, -0.9996988177f, -0.9987954497f, -0.9924795628f, -0.9807852507f, -0.963776052f, -0.9415440559f, -0.9142097831f, -0.8819212914f, -0.84485358f, -0.8032075167f, -0.7572088242f, -0.7071067691f, -0.6531728506f, -0.5956993103f, -0.534997642f, -0.4713967443f, -0.4052413106f, -0.336889863f, -0.266712755f, -0.1950903237f, -0.1224106774f, -0.04906767607f, 0.02454122901f, 0.09801714122f, 0.1709618866f, 0.2429801822f, 0.3136817515f, 0.3826834261f, 0.449611336f, 0.514102757f, 0.5758081675f, 0.6343932748f, 0.689540565f, 0.7409511209f, 0.7883464098f, 0.8314695954f, 0.8700869679f, 0.903989315f, 0.932992816f, 0.9569403529f, 0.975702107f, 0.9891765118f, 0.9972904325f, -0.0f, -0.07356456667f, -0.1467304677f, -0.2191012353f, -0.2902846634f, -0.3598950505f, -0.4275550842f, -0.492898196f, -0.5555702448f, -0.6152315736f, -0.6715589762f, -0.724247098f, -0.7730104327f, -0.8175848126f, -0.8577286005f, -0.893224299f, -0.9238795042f, -0.9495281577f, -0.9700312614f, -0.9852776527f, -0.9951847196f, -0.9996988177f, -0.9987954497f, -0.9924795628f, -0.9807852507f, -0.963776052f, -0.9415440559f, -0.9142097831f, -0.8819212914f, -0.84485358f, -0.8032075167f, -0.7572088242f, -0.7071067691f, -0.6531728506f, -0.5956993103f, -0.534997642f, -0.4713967443f, -0.4052413106f, -0.336889863f, -0.266712755f, -0.1950903237f, -0.1224106774f, -0.04906767607f, 0.02454122901f, 0.09801714122f, 0.1709618866f, 0.2429801822f, 0.3136817515f, 0.3826834261f, 0.449611336f, 0.514102757f, 0.5758081675f, 0.6343932748f, 0.689540565f, 0.7409511209f, 0.7883464098f, 0.8314695954f, 0.8700869679f, 0.903989315f, 0.932992816f, 0.9569403529f, 0.975702107f, 0.9891765118f, 0.9972904325f}; -constant float ts25[512] = {-0.0f, -0.07356456667f, -0.1467304677f, -0.2191012353f, -0.2902846634f, -0.3598950505f, -0.4275550842f, -0.492898196f, -0.5555702448f, -0.6152315736f, -0.6715589762f, -0.724247098f, -0.7730104327f, -0.8175848126f, -0.8577286005f, -0.893224299f, -0.9238795042f, -0.9495281577f, -0.9700312614f, -0.9852776527f, -0.9951847196f, -0.9996988177f, -0.9987954497f, -0.9924795628f, -0.9807852507f, -0.963776052f, -0.9415440559f, -0.9142097831f, -0.8819212914f, -0.84485358f, -0.8032075167f, -0.7572088242f, -0.7071067691f, -0.6531728506f, -0.5956993103f, -0.534997642f, -0.4713967443f, -0.4052413106f, -0.336889863f, -0.266712755f, -0.1950903237f, -0.1224106774f, -0.04906767607f, 0.02454122901f, 0.09801714122f, 0.1709618866f, 0.2429801822f, 0.3136817515f, 0.3826834261f, 0.449611336f, 0.514102757f, 0.5758081675f, 0.6343932748f, 0.689540565f, 0.7409511209f, 0.7883464098f, 0.8314695954f, 0.8700869679f, 0.903989315f, 0.932992816f, 0.9569403529f, 0.975702107f, 0.9891765118f, 0.9972904325f, -0.0f, -0.07356456667f, -0.1467304677f, -0.2191012353f, -0.2902846634f, -0.3598950505f, -0.4275550842f, -0.492898196f, -0.5555702448f, -0.6152315736f, -0.6715589762f, -0.724247098f, -0.7730104327f, -0.8175848126f, -0.8577286005f, -0.893224299f, -0.9238795042f, -0.9495281577f, -0.9700312614f, -0.9852776527f, -0.9951847196f, -0.9996988177f, -0.9987954497f, -0.9924795628f, -0.9807852507f, -0.963776052f, -0.9415440559f, -0.9142097831f, -0.8819212914f, -0.84485358f, -0.8032075167f, -0.7572088242f, -0.7071067691f, -0.6531728506f, -0.5956993103f, -0.534997642f, -0.4713967443f, -0.4052413106f, -0.336889863f, -0.266712755f, -0.1950903237f, -0.1224106774f, -0.04906767607f, 0.02454122901f, 0.09801714122f, 0.1709618866f, 0.2429801822f, 0.3136817515f, 0.3826834261f, 0.449611336f, 0.514102757f, 0.5758081675f, 0.6343932748f, 0.689540565f, 0.7409511209f, 0.7883464098f, 0.8314695954f, 0.8700869679f, 0.903989315f, 0.932992816f, 0.9569403529f, 0.975702107f, 0.9891765118f, 0.9972904325f, -0.0f, -0.07356456667f, -0.1467304677f, -0.2191012353f, -0.2902846634f, -0.3598950505f, -0.4275550842f, -0.492898196f, -0.5555702448f, -0.6152315736f, -0.6715589762f, -0.724247098f, -0.7730104327f, -0.8175848126f, -0.8577286005f, -0.893224299f, -0.9238795042f, -0.9495281577f, -0.9700312614f, -0.9852776527f, -0.9951847196f, -0.9996988177f, -0.9987954497f, -0.9924795628f, -0.9807852507f, -0.963776052f, -0.9415440559f, -0.9142097831f, -0.8819212914f, -0.84485358f, -0.8032075167f, -0.7572088242f, -0.7071067691f, -0.6531728506f, -0.5956993103f, -0.534997642f, -0.4713967443f, -0.4052413106f, -0.336889863f, -0.266712755f, -0.1950903237f, -0.1224106774f, -0.04906767607f, 0.02454122901f, 0.09801714122f, 0.1709618866f, 0.2429801822f, 0.3136817515f, 0.3826834261f, 0.449611336f, 0.514102757f, 0.5758081675f, 0.6343932748f, 0.689540565f, 0.7409511209f, 0.7883464098f, 0.8314695954f, 0.8700869679f, 0.903989315f, 0.932992816f, 0.9569403529f, 0.975702107f, 0.9891765118f, 0.9972904325f, -0.0f, -0.07356456667f, -0.1467304677f, -0.2191012353f, -0.2902846634f, -0.3598950505f, -0.4275550842f, -0.492898196f, -0.5555702448f, -0.6152315736f, -0.6715589762f, -0.724247098f, -0.7730104327f, -0.8175848126f, -0.8577286005f, -0.893224299f, -0.9238795042f, -0.9495281577f, -0.9700312614f, -0.9852776527f, -0.9951847196f, -0.9996988177f, -0.9987954497f, -0.9924795628f, -0.9807852507f, -0.963776052f, -0.9415440559f, -0.9142097831f, -0.8819212914f, -0.84485358f, -0.8032075167f, -0.7572088242f, -0.7071067691f, -0.6531728506f, -0.5956993103f, -0.534997642f, -0.4713967443f, -0.4052413106f, -0.336889863f, -0.266712755f, -0.1950903237f, -0.1224106774f, -0.04906767607f, 0.02454122901f, 0.09801714122f, 0.1709618866f, 0.2429801822f, 0.3136817515f, 0.3826834261f, 0.449611336f, 0.514102757f, 0.5758081675f, 0.6343932748f, 0.689540565f, 0.7409511209f, 0.7883464098f, 0.8314695954f, 0.8700869679f, 0.903989315f, 0.932992816f, 0.9569403529f, 0.975702107f, 0.9891765118f, 0.9972904325f, -0.0f, -0.07356456667f, -0.1467304677f, -0.2191012353f, -0.2902846634f, -0.3598950505f, -0.4275550842f, -0.492898196f, -0.5555702448f, -0.6152315736f, -0.6715589762f, -0.724247098f, -0.7730104327f, -0.8175848126f, -0.8577286005f, -0.893224299f, -0.9238795042f, -0.9495281577f, -0.9700312614f, -0.9852776527f, -0.9951847196f, -0.9996988177f, -0.9987954497f, -0.9924795628f, -0.9807852507f, -0.963776052f, -0.9415440559f, -0.9142097831f, -0.8819212914f, -0.84485358f, -0.8032075167f, -0.7572088242f, -0.7071067691f, -0.6531728506f, -0.5956993103f, -0.534997642f, -0.4713967443f, -0.4052413106f, -0.336889863f, -0.266712755f, -0.1950903237f, -0.1224106774f, -0.04906767607f, 0.02454122901f, 0.09801714122f, 0.1709618866f, 0.2429801822f, 0.3136817515f, 0.3826834261f, 0.449611336f, 0.514102757f, 0.5758081675f, 0.6343932748f, 0.689540565f, 0.7409511209f, 0.7883464098f, 0.8314695954f, 0.8700869679f, 0.903989315f, 0.932992816f, 0.9569403529f, 0.975702107f, 0.9891765118f, 0.9972904325f, -0.0f, -0.07356456667f, -0.1467304677f, -0.2191012353f, -0.2902846634f, -0.3598950505f, -0.4275550842f, -0.492898196f, -0.5555702448f, -0.6152315736f, -0.6715589762f, -0.724247098f, -0.7730104327f, -0.8175848126f, -0.8577286005f, -0.893224299f, -0.9238795042f, -0.9495281577f, -0.9700312614f, -0.9852776527f, -0.9951847196f, -0.9996988177f, -0.9987954497f, -0.9924795628f, -0.9807852507f, -0.963776052f, -0.9415440559f, -0.9142097831f, -0.8819212914f, -0.84485358f, -0.8032075167f, -0.7572088242f, -0.7071067691f, -0.6531728506f, -0.5956993103f, -0.534997642f, -0.4713967443f, -0.4052413106f, -0.336889863f, -0.266712755f, -0.1950903237f, -0.1224106774f, -0.04906767607f, 0.02454122901f, 0.09801714122f, 0.1709618866f, 0.2429801822f, 0.3136817515f, 0.3826834261f, 0.449611336f, 0.514102757f, 0.5758081675f, 0.6343932748f, 0.689540565f, 0.7409511209f, 0.7883464098f, 0.8314695954f, 0.8700869679f, 0.903989315f, 0.932992816f, 0.9569403529f, 0.975702107f, 0.9891765118f, 0.9972904325f, -0.0f, -0.07356456667f, -0.1467304677f, -0.2191012353f, -0.2902846634f, -0.3598950505f, -0.4275550842f, -0.492898196f, -0.5555702448f, -0.6152315736f, -0.6715589762f, -0.724247098f, -0.7730104327f, -0.8175848126f, -0.8577286005f, -0.893224299f, -0.9238795042f, -0.9495281577f, -0.9700312614f, -0.9852776527f, -0.9951847196f, -0.9996988177f, -0.9987954497f, -0.9924795628f, -0.9807852507f, -0.963776052f, -0.9415440559f, -0.9142097831f, -0.8819212914f, -0.84485358f, -0.8032075167f, -0.7572088242f, -0.7071067691f, -0.6531728506f, -0.5956993103f, -0.534997642f, -0.4713967443f, -0.4052413106f, -0.336889863f, -0.266712755f, -0.1950903237f, -0.1224106774f, -0.04906767607f, 0.02454122901f, 0.09801714122f, 0.1709618866f, 0.2429801822f, 0.3136817515f, 0.3826834261f, 0.449611336f, 0.514102757f, 0.5758081675f, 0.6343932748f, 0.689540565f, 0.7409511209f, 0.7883464098f, 0.8314695954f, 0.8700869679f, 0.903989315f, 0.932992816f, 0.9569403529f, 0.975702107f, 0.9891765118f, 0.9972904325f, -0.0f, -0.07356456667f, -0.1467304677f, -0.2191012353f, -0.2902846634f, -0.3598950505f, -0.4275550842f, -0.492898196f, -0.5555702448f, -0.6152315736f, -0.6715589762f, -0.724247098f, -0.7730104327f, -0.8175848126f, -0.8577286005f, -0.893224299f, -0.9238795042f, -0.9495281577f, -0.9700312614f, -0.9852776527f, -0.9951847196f, -0.9996988177f, -0.9987954497f, -0.9924795628f, -0.9807852507f, -0.963776052f, -0.9415440559f, -0.9142097831f, -0.8819212914f, -0.84485358f, -0.8032075167f, -0.7572088242f, -0.7071067691f, -0.6531728506f, -0.5956993103f, -0.534997642f, -0.4713967443f, -0.4052413106f, -0.336889863f, -0.266712755f, -0.1950903237f, -0.1224106774f, -0.04906767607f, 0.02454122901f, 0.09801714122f, 0.1709618866f, 0.2429801822f, 0.3136817515f, 0.3826834261f, 0.449611336f, 0.514102757f, 0.5758081675f, 0.6343932748f, 0.689540565f, 0.7409511209f, 0.7883464098f, 0.8314695954f, 0.8700869679f, 0.903989315f, 0.932992816f, 0.9569403529f, 0.975702107f, 0.9891765118f, 0.9972904325f}; -constant float ts30[512] = {-0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f}; -constant float ts33[512] = {-0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f, -0.0f, -0.1950903237f, -0.3826834261f, -0.5555702448f, -0.7071067691f, -0.8314695954f, -0.9238795042f, -0.9807852507f, -1.0f, -0.9807852507f, -0.9238795042f, -0.8314695954f, -0.7071067691f, -0.5555702448f, -0.3826834261f, -0.1950903237f}; -constant float ts31[512] = {-0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f}; -constant float ts34[512] = {-0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f, -0.0f, -0.09801714122f, -0.1950903237f, -0.2902846634f, -0.3826834261f, -0.4713967443f, -0.5555702448f, -0.6343932748f, -0.7071067691f, -0.7730104327f, -0.8314695954f, -0.8819212914f, -0.9238795042f, -0.9569403529f, -0.9807852507f, -0.9951847196f}; -constant float ts32[512] = {-0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f}; -constant float ts35[512] = {-0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f, -0.0f, -0.2902846634f, -0.5555702448f, -0.7730104327f, -0.9238795042f, -0.9951847196f, -0.9807852507f, -0.8819212914f, -0.7071067691f, -0.4713967443f, -0.1950903237f, 0.09801714122f, 0.3826834261f, 0.6343932748f, 0.8314695954f, 0.9569403529f}; -constant float ts40[512] = {-0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f}; -constant float ts43[512] = {-0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f, -0.0f, -0.7071067691f, -1.0f, -0.7071067691f}; -constant float ts41[512] = {-0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f}; -constant float ts44[512] = {-0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f, -0.0f, -0.3826834261f, -0.7071067691f, -0.9238795042f}; -constant float ts42[512] = {-0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f}; -constant float ts45[512] = {-0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f, -0.0f, -0.9238795042f, -0.7071067691f, 0.3826834261f}; - diff --git a/kernels/fft3d/fft3d_bram.cl b/kernels/fft3d/fft3d_bram.cl index 59803c8..d796000 100755 --- a/kernels/fft3d/fft3d_bram.cl +++ b/kernels/fft3d/fft3d_bram.cl @@ -1,7 +1,7 @@ // Author: Arjun Ramaswami #include "fft_config.h" -#include "fft_8.cl" +#include "../common/fft_8.cl" #include "../matrixTranspose/diagonal_bitrev.cl" #pragma OPENCL EXTENSION cl_intel_channels : enable diff --git a/kernels/fft3d/fft3d_ddr.cl b/kernels/fft3d/fft3d_ddr.cl index 9e7fd89..f9b5faa 100755 --- a/kernels/fft3d/fft3d_ddr.cl +++ b/kernels/fft3d/fft3d_ddr.cl @@ -1,7 +1,7 @@ // Author: Arjun Ramaswami #include "fft_config.h" -#include "fft_8.cl" +#include "../common/fft_8.cl" #include "../matrixTranspose/diagonal_bitrev.cl" #pragma OPENCL EXTENSION cl_intel_channels : enable @@ -318,7 +318,7 @@ kernel void transpose3D( unsigned index_wr = (batch_index * N * N * N) + (zdim * N * N) + (ydim * N) + xdim; //float2x8 data, data_out; - if (step < (N * DEPTH)) { + if (step < ((N * DEPTH) - initial_delay)) { data_wr.i0 = src[index_wr + 0]; data_wr.i1 = src[index_wr + 1]; data_wr.i2 = src[index_wr + 2]; diff --git a/kernels/fft3d/fft3d_ddr_batch.cl b/kernels/fft3d/fft3d_ddr_batch.cl index 9e7fd89..77d6945 100755 --- a/kernels/fft3d/fft3d_ddr_batch.cl +++ b/kernels/fft3d/fft3d_ddr_batch.cl @@ -1,7 +1,7 @@ // Author: Arjun Ramaswami #include "fft_config.h" -#include "fft_8.cl" +#include "../common/fft_8.cl" #include "../matrixTranspose/diagonal_bitrev.cl" #pragma OPENCL EXTENSION cl_intel_channels : enable diff --git a/kernels/fft3d/fft3d_ddr_svm.cl b/kernels/fft3d/fft3d_ddr_svm.cl index 2059dce..4f5c5b6 100755 --- a/kernels/fft3d/fft3d_ddr_svm.cl +++ b/kernels/fft3d/fft3d_ddr_svm.cl @@ -1,7 +1,7 @@ // Author: Arjun Ramaswami #include "fft_config.h" -#include "fft_8.cl" +#include "../common/fft_8.cl" #include "../matrixTranspose/diagonal_bitrev.cl" #pragma OPENCL EXTENSION cl_intel_channels : enable diff --git a/kernels/fft3d/fft_8.cl b/kernels/fft3d/fft_8.cl deleted file mode 100755 index c30af91..0000000 --- a/kernels/fft3d/fft_8.cl +++ /dev/null @@ -1,351 +0,0 @@ -// Copyright (C) 2013-2018 Altera Corporation, San Jose, California, USA. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to -// whom the Software is furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. -// -// This agreement shall be governed in all respects by the laws of the State of California and -// by the laws of the United States of America. -// -// Complex single-precision floating-point radix-4 feedforward FFT / iFFT engine -// -// See Mario Garrido, Jesús Grajal, M. A. Sanchez, Oscar Gustafsson: -// Pipeline Radix-2k Feedforward FFT Architectures. -// IEEE Trans. VLSI Syst. 21(1): 23-32 (2013)) -// -// The log(size) of the transform must be a compile-time constant argument. -// This FFT engine processes 8 points for each invocation. The inputs are eight -// ordered streams while the outputs are in bit reversed order. -// -// The entry point of the engine is the 'fft_step' function. This function -// passes 8 data points through a fixed sequence of processing blocks -// (butterfly, rotation, swap, reorder, multiplications, etc.) and produces -// 8 output points towards the overall FFT transform. -// -// The engine is designed to be invoked from a loop in a single work-item task. -// When compiling a single work-item task, the compiler leverages pipeline -// parallelism and overlaps the execution of multiple invocations of this -// function. A new instance can start processing every clock cycle - - -// Includes tabled twiddle factors - storing constants uses fewer resources -// than instantiating 'cos' or 'sin' hardware -#include "twid_radix4_8.cl" - -// Convenience struct representing the 8 data points processed each step -// Each member is a float2 representing a complex number - -typedef struct { - float2 i0; - float2 i1; - float2 i2; - float2 i3; - float2 i4; - float2 i5; - float2 i6; - float2 i7; -} float2x8; - -// FFT butterfly building block -float2x8 butterfly(float2x8 data) { - float2x8 res; - res.i0 = data.i0 + data.i1; - res.i1 = data.i0 - data.i1; - res.i2 = data.i2 + data.i3; - res.i3 = data.i2 - data.i3; - res.i4 = data.i4 + data.i5; - res.i5 = data.i4 - data.i5; - res.i6 = data.i6 + data.i7; - res.i7 = data.i6 - data.i7; - return res; -} - -// Swap real and imaginary components in preparation for inverse transform -float2x8 swap_complex(float2x8 data) { - float2x8 res; - res.i0.x = data.i0.y; - res.i0.y = data.i0.x; - res.i1.x = data.i1.y; - res.i1.y = data.i1.x; - res.i2.x = data.i2.y; - res.i2.y = data.i2.x; - res.i3.x = data.i3.y; - res.i3.y = data.i3.x; - res.i4.x = data.i4.y; - res.i4.y = data.i4.x; - res.i5.x = data.i5.y; - res.i5.y = data.i5.x; - res.i6.x = data.i6.y; - res.i6.y = data.i6.x; - res.i7.x = data.i7.y; - res.i7.y = data.i7.x; - return res; -} - -// FFT trivial rotation building block -float2x8 trivial_rotate(float2x8 data) { - float2 tmp = data.i3; - data.i3.x = tmp.y; - data.i3.y = -tmp.x; - tmp = data.i7; - data.i7.x = tmp.y; - data.i7.y = -tmp.x; - return data; -} - -// FFT data swap building block associated with trivial rotations -float2x8 trivial_swap(float2x8 data) { - float2 tmp = data.i1; - data.i1 = data.i2; - data.i2 = tmp; - tmp = data.i5; - data.i5 = data.i6; - data.i6 = tmp; - return data; -} - -// FFT data swap building block associated with complex rotations -float2x8 swap(float2x8 data) { - float2 tmp = data.i1; - data.i1 = data.i4; - float2 tmp2 = data.i2; - data.i2 = tmp; - tmp = data.i3; - data.i3 = data.i5; - data.i4 = tmp2; - data.i5 = data.i6; - data.i6 = tmp; - return data; -} - -// This function "delays" the input by 'depth' steps -// Input 'data' from invocation N would be returned in invocation N + depth -// The 'shift_reg' sliding window is shifted by 1 element at every invocation -float2 delay(float2 data, const int depth, float2 *shift_reg) { - shift_reg[depth] = data; - return shift_reg[0]; -} - -// FFT data reordering building block. Implements the reordering depicted below -// (for depth = 2). The first valid outputs are in invocation 4 -// Invocation count: 0123... 01234567... -// data.i0 : GECA... ----> DBCA... -// data.i1 : HFDB... ----> HFGE... - -float2x8 reorder_data(float2x8 data, const int depth, float2 * shift_reg, bool toggle) { - // Use disconnected segments of length 'depth + 1' elements starting at - // 'shift_reg' to implement the delay elements. At the end of each FFT step, - // the contents of the entire buffer is shifted by 1 element - data.i1 = delay(data.i1, depth, shift_reg); - data.i3 = delay(data.i3, depth, shift_reg + depth + 1); - data.i5 = delay(data.i5, depth, shift_reg + 2 * (depth + 1)); - data.i7 = delay(data.i7, depth, shift_reg + 3 * (depth + 1)); - - if (toggle) { - float2 tmp = data.i0; - data.i0 = data.i1; - data.i1 = tmp; - tmp = data.i2; - data.i2 = data.i3; - data.i3 = tmp; - tmp = data.i4; - data.i4 = data.i5; - data.i5 = tmp; - tmp = data.i6; - data.i6 = data.i7; - data.i7 = tmp; - } - - data.i0 = delay(data.i0, depth, shift_reg + 4 * (depth + 1)); - data.i2 = delay(data.i2, depth, shift_reg + 5 * (depth + 1)); - data.i4 = delay(data.i4, depth, shift_reg + 6 * (depth + 1)); - data.i6 = delay(data.i6, depth, shift_reg + 7 * (depth + 1)); - - return data; -} - -// Implements a complex number multiplication -float2 comp_mult(float2 a, float2 b) { - float2 res; - res.x = a.x * b.x - a.y * b.y; - res.y = a.x * b.y + a.y * b.x; - return res; -} - -// Produces the twiddle factor associated with a processing stream 'stream', -// at a specified 'stage' during a step 'index' of the computation -// -// If there are precomputed twiddle factors for the given FFT size, uses them -// This saves hardware resources, because it avoids evaluating 'cos' and 'sin' -// functions - -float2 twiddle(int index, int stage, int size, int stream) { - float2 twid; - int twid_stage = stage >> 1; - - // Coalesces the twiddle tables for indexed access - constant float * twiddles_cos[TWID_STAGES][6] = { - {tc00, tc01, tc02, tc03, tc04, tc05}, - {tc10, tc11, tc12, tc13, tc14, tc15}, - {tc20, tc21, tc22, tc23, tc24, tc25}, - {tc30, tc31, tc32, tc33, tc34, tc35}, - {tc40, tc41, tc42, tc43, tc44, tc45} - }; - constant float * twiddles_sin[TWID_STAGES][6] = { - {ts00, ts01, ts02, ts03, ts04, ts05}, - {ts10, ts11, ts12, ts13, ts14, ts15}, - {ts20, ts21, ts22, ts23, ts24, ts25}, - {ts30, ts31, ts32, ts33, ts34, ts35}, - {ts40, ts41, ts42, ts43, ts44, ts45} - }; - - // Use the precomputed twiddle factors, if available for single precision floats - otherwise, compute them - if (size <= (1 << (TWID_STAGES * 2 + 2))) { - twid.x = twiddles_cos[twid_stage][stream] - [index * ((1 << (TWID_STAGES * 2 + 2)) / size)]; - twid.y = twiddles_sin[twid_stage][stream] - [index * ((1 << (TWID_STAGES * 2 + 2)) / size)]; - } else { - // This would generate hardware consuming a large number of resources - // Instantiated only if precomputed twiddle factors are available - const float TWOPI = 2.0f * M_PI_F; - - int multiplier; - - // The latter 3 streams will generate the second half of the elements - // In that case phase = 1 - int phase = 0; - if (stream >= 3) { - stream -= 3; - phase = 1; - } - switch (stream) { - case 0: multiplier = 2; break; - case 1: multiplier = 1; break; - case 2: multiplier = 3; break; - default: multiplier = 0; - } - int pos = (1 << (stage - 1)) * multiplier * ((index + (size / 8) * phase) - & (size / 4 / (1 << (stage - 1)) - 1)); - float theta = -1.0f * TWOPI / size * (pos & (size - 1)); - twid.x = cos(theta); - twid.y = sin(theta); - } - - return twid; -} - -// FFT complex rotation building block -float2x8 complex_rotate(float2x8 data, int index, int stage, int size) { - data.i1 = comp_mult(data.i1, twiddle(index, stage, size, 0)); - data.i2 = comp_mult(data.i2, twiddle(index, stage, size, 1)); - data.i3 = comp_mult(data.i3, twiddle(index, stage, size, 2)); - data.i5 = comp_mult(data.i5, twiddle(index, stage, size, 3)); - data.i6 = comp_mult(data.i6, twiddle(index, stage, size, 4)); - data.i7 = comp_mult(data.i7, twiddle(index, stage, size, 5)); - return data; -} - - -// Process 8 input points towards and a FFT/iFFT of size N, N >= 8 -// (in order input, bit reversed output). Apply all input points in N / 8 -// consecutive invocations. Obtain all outputs in N /8 consecutive invocations -// starting with invocation N /8 - 1 (outputs are delayed). Multiple back-to-back -// transforms can be executed -// -// 'data' encapsulates 8 complex single-precision floating-point input points -// 'step' specifies the index of the current invocation -// 'fft_delay_elements' is an array representing a sliding window of size N+8*(log(N)-2) -// 'inverse' toggles between the direct and inverse transform -// 'logN' should be a COMPILE TIME constant evaluating log(N) - the constant is -// propagated throughout the code to achieve efficient hardware -// -float2x8 fft_step(float2x8 data, int step, float2 *fft_delay_elements, - bool inverse, const int logN) { - const int size = 1 << logN; - - // Swap real and imaginary components if doing an inverse transform - if (inverse) { - data = swap_complex(data); - } - - // Stage 0 of feed-forward FFT - data = butterfly(data); - data = trivial_rotate(data); - data = trivial_swap(data); - - // Stage 1 - data = butterfly(data); - data = complex_rotate(data, step & (size / 8 - 1), 1, size); - data = swap(data); - - // Next logN - 2 stages alternate two computation patterns - represented as - // a loop to avoid code duplication. Instruct the compiler to fully unroll - // the loop to increase the amount of pipeline parallelism and allow feed - // forward execution - - #pragma unroll - for (int stage = 2; stage < logN - 1; stage++) { - bool complex_stage = stage & 1; // stages 3, 5, ... - - // Figure out the index of the element processed at this stage - // Subtract (add modulo size / 8) the delay incurred as data travels - // from one stage to the next - int data_index = (step + ( 1 << (logN - 1 - stage))) & (size / 8 - 1); - - data = butterfly(data); - - if (complex_stage) { - data = complex_rotate(data, data_index, stage, size); - } - - data = swap(data); - - // Compute the delay of this stage - int delay = 1 << (logN - 2 - stage); - - // Reordering multiplexers must toggle every 'delay' steps - bool toggle = data_index & delay; - - // Assign unique sections of the buffer for the set of delay elements at - // each stage - float2 *head_buffer = fft_delay_elements + - size - (1 << (logN - stage + 2)) + 8 * (stage - 2); - - data = reorder_data(data, delay, head_buffer, toggle); - - if (!complex_stage) { - data = trivial_rotate(data); - } - } - - // Stage logN - 1 - data = butterfly(data); - - // Shift the contents of the sliding window. The hardware is capable of - // shifting the entire contents in parallel if the loop is unrolled. More - // important, when unrolling this loop each transfer maps to a trivial - // loop-carried dependency - #pragma unroll - for (int ii = 0; ii < size + 8 * (logN - 2) - 1; ii++) { - fft_delay_elements[ii] = fft_delay_elements[ii + 1]; - } - - if (inverse) { - data = swap_complex(data); - } - - return data; -} \ No newline at end of file diff --git a/kernels/fft3d/twid_radix4_8.cl b/kernels/fft3d/twid_radix4_8.cl deleted file mode 100755 index ec34476..0000000 --- a/kernels/fft3d/twid_radix4_8.cl +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright (C) 2013-2018 Altera Corporation, San Jose, California, USA. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to -// whom the Software is furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. -// -// This agreement shall be governed in all respects by the laws of the State of California and -// by the laws of the United States of America. - -// Twiddle factors for radix-4 FFTs -// Precomputed for FFT sizes between 8 and 4096 points - -#define TWID_STAGES 5 - -constant float tc00[512] = {1, 0.9999952912, 0.9999811649, 0.9999576211, 0.9999247193, 0.9998823404, 0.9998306036, 0.9997693896, 0.9996988177, 0.9996188283, 0.9995294213, 0.9994305968, 0.9993223548, 0.9992047548, 0.9990777373, 0.9989413023, 0.9987954497, 0.9986402392, 0.9984755516, 0.9983015656, 0.9981181026, 0.9979252815, 0.997723043, 0.9975114465, 0.9972904325, 0.9970600605, 0.996820271, 0.9965711236, 0.9963126183, 0.9960446954, 0.9957674146, 0.9954807758, 0.9951847196, 0.9948793054, 0.9945645928, 0.9942404628, 0.9939069748, 0.9935641289, 0.993211925, 0.9928504229, 0.9924795628, 0.9920992851, 0.9917097688, 0.9913108349, 0.9909026623, 0.9904850721, 0.9900581837, 0.9896219969, 0.9891765118, 0.9887216687, 0.988257587, 0.9877841473, 0.9873014092, 0.9868093729, 0.9863080978, 0.9857975245, 0.9852776527, 0.9847484827, 0.9842100739, 0.9836624265, 0.9831054807, 0.9825392962, 0.9819638729, 0.9813792109, 0.9807852507, 0.9801821113, 0.9795697927, 0.9789481759, 0.97831738, 0.9776773453, 0.9770281315, 0.9763697386, 0.975702107, 0.9750253558, 0.974339366, 0.9736442566, 0.9729399681, 0.9722265005, 0.9715039134, 0.9707721472, 0.9700312614, 0.9692812562, 0.9685220718, 0.9677538276, 0.9669764638, 0.9661899805, 0.9653944373, 0.9645897746, 0.963776052, 0.9629532695, 0.9621214271, 0.9612804651, 0.9604305029, 0.9595715404, 0.9587034583, 0.9578264356, 0.9569403529, 0.95604527, 0.9551411867, 0.9542281032, 0.9533060193, 0.9523749948, 0.9514350295, 0.950486064, 0.9495281577, 0.9485613704, 0.9475855827, 0.946600914, 0.9456073046, 0.9446048141, 0.9435934424, 0.9425731897, 0.9415440559, 0.940506041, 0.9394592047, 0.9384035468, 0.9373390079, 0.9362656474, 0.9351835251, 0.9340925217, 0.932992816, 0.9318842888, 0.9307669401, 0.9296408892, 0.9285060763, 0.9273625016, 0.9262102246, 0.9250492454, 0.9238795042, 0.9227011204, 0.9215140343, 0.9203183055, 0.9191138744, 0.9179008007, 0.9166790843, 0.9154487252, 0.9142097831, 0.9129621983, 0.9117060304, 0.9104412794, 0.909168005, 0.9078860879, 0.9065957069, 0.9052967429, 0.903989315, 0.9026733041, 0.9013488293, 0.9000158906, 0.8986744881, 0.8973245621, 0.8959662318, 0.8945994973, 0.893224299, 0.8918406963, 0.8904487491, 0.8890483379, 0.8876396418, 0.8862225413, 0.8847970963, 0.8833633661, 0.8819212914, 0.8804708719, 0.8790122271, 0.8775452971, 0.8760700822, 0.8745866418, 0.8730949759, 0.8715950847, 0.8700869679, 0.8685706854, 0.867046237, 0.8655136228, 0.8639728427, 0.8624239564, 0.8608669639, 0.8593018055, 0.8577286005, 0.8561473489, 0.854557991, 0.8529605865, 0.851355195, 0.8497417569, 0.8481203318, 0.8464909196, 0.84485358, 0.8432082534, 0.8415549994, 0.8398938179, 0.838224709, 0.8365477324, 0.8348628879, 0.8331701756, 0.8314695954, 0.8297612071, 0.8280450702, 0.8263210654, 0.8245893121, 0.8228498101, 0.8211025, 0.8193475008, 0.8175848126, 0.8158144355, 0.8140363097, 0.8122506142, 0.81045717, 0.8086561561, 0.8068475723, 0.8050313592, 0.8032075167, 0.801376164, 0.7995372415, 0.7976908684, 0.7958369255, 0.7939754725, 0.7921065688, 0.7902302146, 0.7883464098, 0.786455214, 0.7845565677, 0.7826505899, 0.7807372212, 0.7788165212, 0.7768884897, 0.7749531269, 0.7730104327, 0.7710605264, 0.7691033483, 0.7671388984, 0.7651672363, 0.7631884217, 0.761202395, 0.7592092156, 0.7572088242, 0.7552013993, 0.7531868219, 0.7511651516, 0.7491363883, 0.7471005917, 0.7450577617, 0.7430079579, 0.7409511209, 0.73888731, 0.7368165851, 0.7347388864, 0.7326542735, 0.7305627465, 0.728464365, 0.726359129, 0.724247098, 0.7221282125, 0.720002532, 0.7178700566, 0.7157308459, 0.7135848403, 0.7114322186, 0.7092728019, 0.7071067691, 0.7049340606, 0.7027547359, 0.7005687952, 0.6983762383, 0.696177125, 0.6939714551, 0.6917592287, 0.689540565, 0.6873153448, 0.6850836873, 0.6828455329, 0.6806010008, 0.6783500314, 0.6760926843, 0.6738290191, 0.6715589762, 0.6692826152, 0.6669999361, 0.6647109985, 0.6624158025, 0.6601143479, 0.6578066945, 0.6554928422, 0.6531728506, 0.6508466601, 0.64851439, 0.6461760402, 0.6438315511, 0.6414810419, 0.6391244531, 0.6367618442, 0.6343932748, 0.6320187449, 0.6296382546, 0.6272518039, 0.6248595119, 0.6224612594, 0.6200572252, 0.6176472902, 0.6152315736, 0.6128100753, 0.6103827953, 0.6079497933, 0.6055110693, 0.6030666232, 0.6006164551, 0.5981606841, 0.5956993103, 0.5932322741, 0.5907596946, 0.5882815719, 0.5857978463, 0.5833086371, 0.5808139443, 0.5783137679, 0.5758081675, 0.573297143, 0.5707807541, 0.5682589412, 0.5657318234, 0.5631993413, 0.5606615543, 0.5581185222, 0.5555702448, 0.5530167222, 0.5504579544, 0.5478940606, 0.5453249812, 0.5427507758, 0.5401714444, 0.5375870466, 0.534997642, 0.5324031115, 0.5298036337, 0.5271991491, 0.5245896578, 0.5219752789, 0.5193560123, 0.5167317986, 0.514102757, 0.5114688277, 0.5088301301, 0.5061866641, 0.5035383701, 0.5008853674, 0.4982276559, 0.4955652654, 0.492898196, 0.4902264774, 0.4875501692, 0.4848692417, 0.4821837842, 0.479493767, 0.4767992198, 0.4741002023, 0.4713967443, 0.4686888158, 0.4659765065, 0.4632597864, 0.4605387151, 0.4578132927, 0.4550835788, 0.4523495734, 0.449611336, 0.4468688369, 0.4441221356, 0.4413712621, 0.438616246, 0.4358570874, 0.433093816, 0.4303264916, 0.4275550842, 0.4247796834, 0.4220002592, 0.4192169011, 0.4164295495, 0.4136383235, 0.4108431637, 0.4080441594, 0.4052413106, 0.4024346471, 0.3996241987, 0.3968099952, 0.3939920366, 0.3911703825, 0.3883450329, 0.3855160475, 0.3826834261, 0.3798471987, 0.3770074248, 0.3741640747, 0.3713172078, 0.3684668243, 0.3656129837, 0.3627557158, 0.3598950505, 0.3570309579, 0.3541635275, 0.3512927592, 0.3484186828, 0.3455413282, 0.3426607251, 0.3397768736, 0.336889863, 0.3339996636, 0.3311063051, 0.3282098472, 0.3253102899, 0.3224076927, 0.3195020258, 0.3165933788, 0.3136817515, 0.310767144, 0.3078496456, 0.3049292266, 0.3020059466, 0.2990798354, 0.296150893, 0.2932191491, 0.2902846634, 0.2873474658, 0.2844075263, 0.2814649343, 0.27851969, 0.2755718231, 0.2726213634, 0.2696683109, 0.266712755, 0.2637546659, 0.2607941031, 0.2578310966, 0.2548656464, 0.2518978119, 0.2489276081, 0.24595505, 0.2429801822, 0.2400030196, 0.2370236069, 0.234041959, 0.2310581058, 0.228072077, 0.2250839174, 0.2220936269, 0.2191012353, 0.2161068022, 0.2131103128, 0.2101118416, 0.2071113735, 0.2041089684, 0.201104641, 0.1980984062, 0.1950903237, 0.1920803934, 0.1890686601, 0.1860551536, 0.1830398887, 0.1800228953, 0.1770042181, 0.1739838719, 0.1709618866, 0.167938292, 0.1649131179, 0.161886394, 0.1588581502, 0.1558284014, 0.1527971923, 0.1497645378, 0.1467304677, 0.1436950266, 0.1406582445, 0.1376201212, 0.1345807016, 0.1315400302, 0.1284981072, 0.1254549772, 0.1224106774, 0.1193652153, 0.1163186282, 0.1132709533, 0.1102222055, 0.1071724221, 0.1041216329, 0.1010698602, 0.09801714122, 0.09496349841, 0.09190895408, 0.08885355294, 0.08579730988, 0.08274026215, 0.07968243957, 0.07662386447, 0.07356456667, 0.07050457597, 0.06744392216, 0.06438262761, 0.061320737, 0.05825826526, 0.05519524589, 0.05213170499, 0.04906767607, 0.04600318149, 0.0429382585, 0.03987292573, 0.03680722415, 0.0337411724, 0.030674804, 0.02760814503, 0.02454122901, 0.02147408016, 0.01840673015, 0.01533920597, 0.01227153838, 0.009203754365, 0.006135884672, 0.003067956772}; -constant float tc03[512] = {6.123234263e-17, -0.003067956772, -0.006135884672, -0.009203754365, -0.01227153838, -0.01533920597, -0.01840673015, -0.02147408016, -0.02454122901, -0.02760814503, -0.030674804, -0.0337411724, -0.03680722415, -0.03987292573, -0.0429382585, -0.04600318149, -0.04906767607, -0.05213170499, -0.05519524589, -0.05825826526, -0.061320737, -0.06438262761, -0.06744392216, -0.07050457597, -0.07356456667, -0.07662386447, -0.07968243957, -0.08274026215, -0.08579730988, -0.08885355294, -0.09190895408, -0.09496349841, -0.09801714122, -0.1010698602, -0.1041216329, -0.1071724221, -0.1102222055, -0.1132709533, -0.1163186282, -0.1193652153, -0.1224106774, -0.1254549772, -0.1284981072, -0.1315400302, -0.1345807016, -0.1376201212, -0.1406582445, -0.1436950266, -0.1467304677, -0.1497645378, -0.1527971923, -0.1558284014, -0.1588581502, -0.161886394, -0.1649131179, -0.167938292, -0.1709618866, -0.1739838719, -0.1770042181, -0.1800228953, -0.1830398887, -0.1860551536, -0.1890686601, -0.1920803934, -0.1950903237, -0.1980984062, -0.201104641, -0.2041089684, -0.2071113735, -0.2101118416, -0.2131103128, -0.2161068022, -0.2191012353, -0.2220936269, -0.2250839174, -0.228072077, -0.2310581058, -0.234041959, -0.2370236069, -0.2400030196, -0.2429801822, -0.24595505, -0.2489276081, -0.2518978119, -0.2548656464, -0.2578310966, -0.2607941031, -0.2637546659, -0.266712755, -0.2696683109, -0.2726213634, -0.2755718231, -0.27851969, -0.2814649343, -0.2844075263, -0.2873474658, -0.2902846634, -0.2932191491, -0.296150893, -0.2990798354, -0.3020059466, -0.3049292266, -0.3078496456, -0.310767144, -0.3136817515, -0.3165933788, -0.3195020258, -0.3224076927, -0.3253102899, -0.3282098472, -0.3311063051, -0.3339996636, -0.336889863, -0.3397768736, -0.3426607251, -0.3455413282, -0.3484186828, -0.3512927592, -0.3541635275, -0.3570309579, -0.3598950505, -0.3627557158, -0.3656129837, -0.3684668243, -0.3713172078, -0.3741640747, -0.3770074248, -0.3798471987, -0.3826834261, -0.3855160475, -0.3883450329, -0.3911703825, -0.3939920366, -0.3968099952, -0.3996241987, -0.4024346471, -0.4052413106, -0.4080441594, -0.4108431637, -0.4136383235, -0.4164295495, -0.4192169011, -0.4220002592, -0.4247796834, -0.4275550842, -0.4303264916, -0.433093816, -0.4358570874, -0.438616246, -0.4413712621, -0.4441221356, -0.4468688369, -0.449611336, -0.4523495734, -0.4550835788, -0.4578132927, -0.4605387151, -0.4632597864, -0.4659765065, -0.4686888158, -0.4713967443, -0.4741002023, -0.4767992198, -0.479493767, -0.4821837842, -0.4848692417, -0.4875501692, -0.4902264774, -0.492898196, -0.4955652654, -0.4982276559, -0.5008853674, -0.5035383701, -0.5061866641, -0.5088301301, -0.5114688277, -0.514102757, -0.5167317986, -0.5193560123, -0.5219752789, -0.5245896578, -0.5271991491, -0.5298036337, -0.5324031115, -0.534997642, -0.5375870466, -0.5401714444, -0.5427507758, -0.5453249812, -0.5478940606, -0.5504579544, -0.5530167222, -0.5555702448, -0.5581185222, -0.5606615543, -0.5631993413, -0.5657318234, -0.5682589412, -0.5707807541, -0.573297143, -0.5758081675, -0.5783137679, -0.5808139443, -0.5833086371, -0.5857978463, -0.5882815719, -0.5907596946, -0.5932322741, -0.5956993103, -0.5981606841, -0.6006164551, -0.6030666232, -0.6055110693, -0.6079497933, -0.6103827953, -0.6128100753, -0.6152315736, -0.6176472902, -0.6200572252, -0.6224612594, -0.6248595119, -0.6272518039, -0.6296382546, -0.6320187449, -0.6343932748, -0.6367618442, -0.6391244531, -0.6414810419, -0.6438315511, -0.6461760402, -0.64851439, -0.6508466601, -0.6531728506, -0.6554928422, -0.6578066945, -0.6601143479, -0.6624158025, -0.6647109985, -0.6669999361, -0.6692826152, -0.6715589762, -0.6738290191, -0.6760926843, -0.6783500314, -0.6806010008, -0.6828455329, -0.6850836873, -0.6873153448, -0.689540565, -0.6917592287, -0.6939714551, -0.696177125, -0.6983762383, -0.7005687952, -0.7027547359, -0.7049340606, -0.7071067691, -0.7092728019, -0.7114322186, -0.7135848403, -0.7157308459, -0.7178700566, -0.720002532, -0.7221282125, -0.724247098, -0.726359129, -0.728464365, -0.7305627465, -0.7326542735, -0.7347388864, -0.7368165851, -0.73888731, -0.7409511209, -0.7430079579, -0.7450577617, -0.7471005917, -0.7491363883, -0.7511651516, -0.7531868219, -0.7552013993, -0.7572088242, -0.7592092156, -0.761202395, -0.7631884217, -0.7651672363, -0.7671388984, -0.7691033483, -0.7710605264, -0.7730104327, -0.7749531269, -0.7768884897, -0.7788165212, -0.7807372212, -0.7826505899, -0.7845565677, -0.786455214, -0.7883464098, -0.7902302146, -0.7921065688, -0.7939754725, -0.7958369255, -0.7976908684, -0.7995372415, -0.801376164, -0.8032075167, -0.8050313592, -0.8068475723, -0.8086561561, -0.81045717, -0.8122506142, -0.8140363097, -0.8158144355, -0.8175848126, -0.8193475008, -0.8211025, -0.8228498101, -0.8245893121, -0.8263210654, -0.8280450702, -0.8297612071, -0.8314695954, -0.8331701756, -0.8348628879, -0.8365477324, -0.838224709, -0.8398938179, -0.8415549994, -0.8432082534, -0.84485358, -0.8464909196, -0.8481203318, -0.8497417569, -0.851355195, -0.8529605865, -0.854557991, -0.8561473489, -0.8577286005, -0.8593018055, -0.8608669639, -0.8624239564, -0.8639728427, -0.8655136228, -0.867046237, -0.8685706854, -0.8700869679, -0.8715950847, -0.8730949759, -0.8745866418, -0.8760700822, -0.8775452971, -0.8790122271, -0.8804708719, -0.8819212914, -0.8833633661, -0.8847970963, -0.8862225413, -0.8876396418, -0.8890483379, -0.8904487491, -0.8918406963, -0.893224299, -0.8945994973, -0.8959662318, -0.8973245621, -0.8986744881, -0.9000158906, -0.9013488293, -0.9026733041, -0.903989315, -0.9052967429, -0.9065957069, -0.9078860879, -0.909168005, -0.9104412794, -0.9117060304, -0.9129621983, -0.9142097831, -0.9154487252, -0.9166790843, -0.9179008007, -0.9191138744, -0.9203183055, -0.9215140343, -0.9227011204, -0.9238795042, -0.9250492454, -0.9262102246, -0.9273625016, -0.9285060763, -0.9296408892, -0.9307669401, -0.9318842888, -0.932992816, -0.9340925217, -0.9351835251, -0.9362656474, -0.9373390079, -0.9384035468, -0.9394592047, -0.940506041, -0.9415440559, -0.9425731897, -0.9435934424, -0.9446048141, -0.9456073046, -0.946600914, -0.9475855827, -0.9485613704, -0.9495281577, -0.950486064, -0.9514350295, -0.9523749948, -0.9533060193, -0.9542281032, -0.9551411867, -0.95604527, -0.9569403529, -0.9578264356, -0.9587034583, -0.9595715404, -0.9604305029, -0.9612804651, -0.9621214271, -0.9629532695, -0.963776052, -0.9645897746, -0.9653944373, -0.9661899805, -0.9669764638, -0.9677538276, -0.9685220718, -0.9692812562, -0.9700312614, -0.9707721472, -0.9715039134, -0.9722265005, -0.9729399681, -0.9736442566, -0.974339366, -0.9750253558, -0.975702107, -0.9763697386, -0.9770281315, -0.9776773453, -0.97831738, -0.9789481759, -0.9795697927, -0.9801821113, -0.9807852507, -0.9813792109, -0.9819638729, -0.9825392962, -0.9831054807, -0.9836624265, -0.9842100739, -0.9847484827, -0.9852776527, -0.9857975245, -0.9863080978, -0.9868093729, -0.9873014092, -0.9877841473, -0.988257587, -0.9887216687, -0.9891765118, -0.9896219969, -0.9900581837, -0.9904850721, -0.9909026623, -0.9913108349, -0.9917097688, -0.9920992851, -0.9924795628, -0.9928504229, -0.993211925, -0.9935641289, -0.9939069748, -0.9942404628, -0.9945645928, -0.9948793054, -0.9951847196, -0.9954807758, -0.9957674146, -0.9960446954, -0.9963126183, -0.9965711236, -0.996820271, -0.9970600605, -0.9972904325, -0.9975114465, -0.997723043, -0.9979252815, -0.9981181026, -0.9983015656, -0.9984755516, -0.9986402392, -0.9987954497, -0.9989413023, -0.9990777373, -0.9992047548, -0.9993223548, -0.9994305968, -0.9995294213, -0.9996188283, -0.9996988177, -0.9997693896, -0.9998306036, -0.9998823404, -0.9999247193, -0.9999576211, -0.9999811649, -0.9999952912}; -constant float tc01[512] = {1, 0.9999988079, 0.9999952912, 0.9999893904, 0.9999811649, 0.9999706149, 0.9999576211, 0.9999423623, 0.9999247193, 0.9999046922, 0.9998823404, 0.9998576641, 0.9998306036, 0.9998011589, 0.9997693896, 0.9997352958, 0.9996988177, 0.9996600151, 0.9996188283, 0.9995753169, 0.9995294213, 0.9994812012, 0.9994305968, 0.9993776679, 0.9993223548, 0.9992647767, 0.9992047548, 0.9991424084, 0.9990777373, 0.9990106821, 0.9989413023, 0.9988695383, 0.9987954497, 0.9987190366, 0.9986402392, 0.9985590577, 0.9984755516, 0.9983897209, 0.9983015656, 0.9982110262, 0.9981181026, 0.9980228543, 0.9979252815, 0.9978253245, 0.997723043, 0.9976184368, 0.9975114465, 0.9974021316, 0.9972904325, 0.9971764088, 0.9970600605, 0.996941328, 0.996820271, 0.9966968894, 0.9965711236, 0.9964430332, 0.9963126183, 0.9961798191, 0.9960446954, 0.9959072471, 0.9957674146, 0.9956252575, 0.9954807758, 0.99533391, 0.9951847196, 0.9950332046, 0.9948793054, 0.9947231412, 0.9945645928, 0.9944036603, 0.9942404628, 0.9940748811, 0.9939069748, 0.9937367439, 0.9935641289, 0.9933891892, 0.993211925, 0.9930323362, 0.9928504229, 0.9926661253, 0.9924795628, 0.992290616, 0.9920992851, 0.9919056892, 0.9917097688, 0.9915114641, 0.9913108349, 0.9911079407, 0.9909026623, 0.9906949997, 0.9904850721, 0.99027282, 0.9900581837, 0.9898412824, 0.9896219969, 0.9894004464, 0.9891765118, 0.9889502525, 0.9887216687, 0.9884908199, 0.988257587, 0.9880220294, 0.9877841473, 0.9875439405, 0.9873014092, 0.9870565534, 0.9868093729, 0.9865599275, 0.9863080978, 0.9860539436, 0.9857975245, 0.9855387211, 0.9852776527, 0.9850142598, 0.9847484827, 0.9844804406, 0.9842100739, 0.9839374423, 0.9836624265, 0.9833850861, 0.9831054807, 0.9828235507, 0.9825392962, 0.982252717, 0.9819638729, 0.9816727042, 0.9813792109, 0.9810833931, 0.9807852507, 0.9804848433, 0.9801821113, 0.9798771143, 0.9795697927, 0.9792601466, 0.9789481759, 0.9786339402, 0.97831738, 0.9779984951, 0.9776773453, 0.9773538709, 0.9770281315, 0.9767000675, 0.9763697386, 0.9760370851, 0.975702107, 0.9753648639, 0.9750253558, 0.9746835232, 0.974339366, 0.9739929438, 0.9736442566, 0.9732932448, 0.9729399681, 0.9725843668, 0.9722265005, 0.9718663096, 0.9715039134, 0.971139133, 0.9707721472, 0.9704028368, 0.9700312614, 0.9696573615, 0.9692812562, 0.9689028263, 0.9685220718, 0.968139112, 0.9677538276, 0.9673662782, 0.9669764638, 0.9665843844, 0.9661899805, 0.9657933712, 0.9653944373, 0.9649932384, 0.9645897746, 0.9641840458, 0.963776052, 0.9633657932, 0.9629532695, 0.9625384808, 0.9621214271, 0.9617020488, 0.9612804651, 0.9608566165, 0.9604305029, 0.9600021243, 0.9595715404, 0.9591386318, 0.9587034583, 0.9582660794, 0.9578264356, 0.9573845267, 0.9569403529, 0.9564939141, 0.95604527, 0.9555943608, 0.9551411867, 0.9546857476, 0.9542281032, 0.9537681937, 0.9533060193, 0.9528416395, 0.9523749948, 0.9519061446, 0.9514350295, 0.9509616494, 0.950486064, 0.9500082731, 0.9495281577, 0.9490458965, 0.9485613704, 0.9480745792, 0.9475855827, 0.9470943809, 0.946600914, 0.9461052418, 0.9456073046, 0.9451072216, 0.9446048141, 0.9441002607, 0.9435934424, 0.9430844188, 0.9425731897, 0.9420597553, 0.9415440559, 0.9410261512, 0.940506041, 0.9399837255, 0.9394592047, 0.9389324784, 0.9384035468, 0.9378723502, 0.9373390079, 0.9368034601, 0.9362656474, 0.9357256889, 0.9351835251, 0.9346391559, 0.9340925217, 0.9335438013, 0.932992816, 0.9324396253, 0.9318842888, 0.9313266873, 0.9307669401, 0.9302050471, 0.9296408892, 0.9290745854, 0.9285060763, 0.9279354215, 0.9273625016, 0.9267874956, 0.9262102246, 0.9256308079, 0.9250492454, 0.9244654775, 0.9238795042, 0.9232914448, 0.9227011204, 0.9221086502, 0.9215140343, 0.920917213, 0.9203183055, 0.919717133, 0.9191138744, 0.9185084105, 0.9179008007, 0.9172909856, 0.9166790843, 0.9160649776, 0.9154487252, 0.914830327, 0.9142097831, 0.9135870337, 0.9129621983, 0.9123351574, 0.9117060304, 0.9110747576, 0.9104412794, 0.9098057151, 0.909168005, 0.9085280895, 0.9078860879, 0.9072420001, 0.9065957069, 0.905947268, 0.9052967429, 0.9046440721, 0.903989315, 0.9033323526, 0.9026733041, 0.9020121694, 0.9013488293, 0.900683403, 0.9000158906, 0.8993462324, 0.8986744881, 0.898000598, 0.8973245621, 0.8966464996, 0.8959662318, 0.8952839375, 0.8945994973, 0.893912971, 0.893224299, 0.8925335407, 0.8918406963, 0.8911457658, 0.8904487491, 0.8897495866, 0.8890483379, 0.8883450627, 0.8876396418, 0.8869321346, 0.8862225413, 0.8855108619, 0.8847970963, 0.8840812445, 0.8833633661, 0.882643342, 0.8819212914, 0.8811970949, 0.8804708719, 0.8797426224, 0.8790122271, 0.8782798052, 0.8775452971, 0.8768087029, 0.8760700822, 0.8753293753, 0.8745866418, 0.8738418221, 0.8730949759, 0.8723460436, 0.8715950847, 0.8708420396, 0.8700869679, 0.8693298697, 0.8685706854, 0.8678094745, 0.867046237, 0.866280973, 0.8655136228, 0.864744246, 0.8639728427, 0.8631994128, 0.8624239564, 0.8616464734, 0.8608669639, 0.8600853682, 0.8593018055, 0.8585162163, 0.8577286005, 0.8569389582, 0.8561473489, 0.8553536534, 0.854557991, 0.8537603021, 0.8529605865, 0.8521589041, 0.851355195, 0.8505494595, 0.8497417569, 0.8489320278, 0.8481203318, 0.8473066092, 0.8464909196, 0.8456732631, 0.84485358, 0.8440318704, 0.8432082534, 0.8423826098, 0.8415549994, 0.8407253623, 0.8398938179, 0.8390602469, 0.838224709, 0.8373872042, 0.8365477324, 0.8357062936, 0.8348628879, 0.8340175152, 0.8331701756, 0.832320869, 0.8314695954, 0.8306164145, 0.8297612071, 0.8289040923, 0.8280450702, 0.8271840215, 0.8263210654, 0.8254561424, 0.8245893121, 0.8237205148, 0.8228498101, 0.8219771385, 0.8211025, 0.8202259541, 0.8193475008, 0.8184671402, 0.8175848126, 0.8167005777, 0.8158144355, 0.8149263263, 0.8140363097, 0.8131443858, 0.8122506142, 0.8113548756, 0.81045717, 0.8095576167, 0.8086561561, 0.8077528477, 0.8068475723, 0.8059403896, 0.8050313592, 0.8041203618, 0.8032075167, 0.8022928238, 0.801376164, 0.8004576564, 0.7995372415, 0.7986149788, 0.7976908684, 0.796764791, 0.7958369255, 0.7949071527, 0.7939754725, 0.7930419445, 0.7921065688, 0.7911693454, 0.7902302146, 0.7892892361, 0.7883464098, 0.7874017358, 0.786455214, 0.7855068445, 0.7845565677, 0.7836045027, 0.7826505899, 0.7816948295, 0.7807372212, 0.7797777653, 0.7788165212, 0.7778534293, 0.7768884897, 0.7759217024, 0.7749531269, 0.7739827037, 0.7730104327, 0.7720363736, 0.7710605264, 0.7700828314, 0.7691033483, 0.7681220174, 0.7671388984, 0.7661539912, 0.7651672363, 0.7641787529, 0.7631884217, 0.7621963024, 0.761202395, 0.7602066994, 0.7592092156, 0.7582098842, 0.7572088242, 0.756205976, 0.7552013993, 0.7541949749, 0.7531868219, 0.7521768212, 0.7511651516, 0.7501516342, 0.7491363883, 0.7481193542, 0.7471005917, 0.7460801005, 0.7450577617, 0.7440337539, 0.7430079579, 0.7419804335, 0.7409511209, 0.7399200797, 0.73888731, 0.7378528118, 0.7368165851, 0.7357785702, 0.7347388864, 0.7336974144, 0.7326542735, 0.7316094041, 0.7305627465, 0.72951442, 0.728464365, 0.727412641, 0.726359129, 0.7253039479, 0.724247098, 0.7231884599, 0.7221282125, 0.7210661769, 0.720002532, 0.718937099, 0.7178700566, 0.7168012857, 0.7157308459, 0.7146586776, 0.7135848403, 0.7125093937, 0.7114322186, 0.7103533745, 0.7092728019, 0.7081906199}; -constant float tc04[512] = {0.7071067691, 0.7060212493, 0.7049340606, 0.7038452625, 0.7027547359, 0.7016626, 0.7005687952, 0.6994733214, 0.6983762383, 0.6972774863, 0.696177125, 0.6950750947, 0.6939714551, 0.6928661466, 0.6917592287, 0.6906507015, 0.689540565, 0.6884287596, 0.6873153448, 0.6862003207, 0.6850836873, 0.683965385, 0.6828455329, 0.6817240715, 0.6806010008, 0.6794763207, 0.6783500314, 0.6772221923, 0.6760926843, 0.6749616265, 0.6738290191, 0.6726947427, 0.6715589762, 0.6704215407, 0.6692826152, 0.6681420207, 0.6669999361, 0.6658562422, 0.6647109985, 0.6635641456, 0.6624158025, 0.6612658501, 0.6601143479, 0.6589612961, 0.6578066945, 0.6566505432, 0.6554928422, 0.6543335915, 0.6531728506, 0.65201056, 0.6508466601, 0.6496813297, 0.64851439, 0.6473459601, 0.6461760402, 0.6450045109, 0.6438315511, 0.6426570415, 0.6414810419, 0.6403034925, 0.6391244531, 0.6379439235, 0.6367618442, 0.6355783343, 0.6343932748, 0.6332067847, 0.6320187449, 0.630829215, 0.6296382546, 0.6284457445, 0.6272518039, 0.6260563731, 0.6248595119, 0.6236611009, 0.6224612594, 0.6212599874, 0.6200572252, 0.618852973, 0.6176472902, 0.616440177, 0.6152315736, 0.6140215397, 0.6128100753, 0.6115971804, 0.6103827953, 0.6091670394, 0.6079497933, 0.6067311168, 0.6055110693, 0.6042895317, 0.6030666232, 0.6018422246, 0.6006164551, 0.5993893147, 0.5981606841, 0.5969306827, 0.5956993103, 0.5944665074, 0.5932322741, 0.5919966698, 0.5907596946, 0.5895212889, 0.5882815719, 0.5870403647, 0.5857978463, 0.584553957, 0.5833086371, 0.582062006, 0.5808139443, 0.5795645714, 0.5783137679, 0.5770616531, 0.5758081675, 0.5745533705, 0.573297143, 0.5720396042, 0.5707807541, 0.5695205331, 0.5682589412, 0.566996038, 0.5657318234, 0.564466238, 0.5631993413, 0.5619311333, 0.5606615543, 0.5593907237, 0.5581185222, 0.5568450093, 0.5555702448, 0.5542941093, 0.5530167222, 0.5517379642, 0.5504579544, 0.5491766334, 0.5478940606, 0.5466101766, 0.5453249812, 0.5440385342, 0.5427507758, 0.5414617658, 0.5401714444, 0.538879931, 0.5375870466, 0.5362929702, 0.534997642, 0.5337010026, 0.5324031115, 0.5311040282, 0.5298036337, 0.5285019875, 0.5271991491, 0.5258949995, 0.5245896578, 0.523283124, 0.5219752789, 0.5206662416, 0.5193560123, 0.5180445313, 0.5167317986, 0.5154178739, 0.514102757, 0.5127863884, 0.5114688277, 0.510150075, 0.5088301301, 0.5075089931, 0.5061866641, 0.5048630834, 0.5035383701, 0.5022124648, 0.5008853674, 0.4995571077, 0.4982276559, 0.4968970418, 0.4955652654, 0.4942322969, 0.492898196, 0.4915629029, 0.4902264774, 0.4888888896, 0.4875501692, 0.4862102866, 0.4848692417, 0.4835270643, 0.4821837842, 0.4808393419, 0.479493767, 0.4781470597, 0.4767992198, 0.4754502773, 0.4741002023, 0.4727490246, 0.4713967443, 0.4700433314, 0.4686888158, 0.4673331976, 0.4659765065, 0.4646186829, 0.4632597864, 0.4618997872, 0.4605387151, 0.4591765404, 0.4578132927, 0.4564489722, 0.4550835788, 0.4537171125, 0.4523495734, 0.4509809911, 0.449611336, 0.448240608, 0.4468688369, 0.4454960227, 0.4441221356, 0.4427472353, 0.4413712621, 0.4399942756, 0.438616246, 0.4372371733, 0.4358570874, 0.4344759583, 0.433093816, 0.4317106605, 0.4303264916, 0.4289412796, 0.4275550842, 0.4261678755, 0.4247796834, 0.4233904779, 0.4220002592, 0.4206090868, 0.4192169011, 0.4178237021, 0.4164295495, 0.4150344133, 0.4136383235, 0.4122412205, 0.4108431637, 0.4094441533, 0.4080441594, 0.4066432118, 0.4052413106, 0.4038384557, 0.4024346471, 0.4010298848, 0.3996241987, 0.3982175589, 0.3968099952, 0.3954014778, 0.3939920366, 0.3925816715, 0.3911703825, 0.3897581697, 0.3883450329, 0.3869310021, 0.3855160475, 0.3841001987, 0.3826834261, 0.3812657595, 0.3798471987, 0.3784277439, 0.3770074248, 0.3755861819, 0.3741640747, 0.3727410734, 0.3713172078, 0.3698924482, 0.3684668243, 0.3670403361, 0.3656129837, 0.3641847968, 0.3627557158, 0.3613258004, 0.3598950505, 0.3584634066, 0.3570309579, 0.3555976748, 0.3541635275, 0.3527285457, 0.3512927592, 0.3498561382, 0.3484186828, 0.3469804227, 0.3455413282, 0.344101429, 0.3426607251, 0.3412192166, 0.3397768736, 0.3383337557, 0.336889863, 0.3354451358, 0.3339996636, 0.3325533569, 0.3311063051, 0.3296584487, 0.3282098472, 0.3267604411, 0.3253102899, 0.3238593638, 0.3224076927, 0.3209552467, 0.3195020258, 0.3180480897, 0.3165933788, 0.3151379228, 0.3136817515, 0.3122248054, 0.310767144, 0.3093087673, 0.3078496456, 0.3063898087, 0.3049292266, 0.3034679592, 0.3020059466, 0.3005432487, 0.2990798354, 0.2976157069, 0.296150893, 0.2946853638, 0.2932191491, 0.291752249, 0.2902846634, 0.2888164222, 0.2873474658, 0.2858778238, 0.2844075263, 0.282936573, 0.2814649343, 0.27999264, 0.27851969, 0.2770460844, 0.2755718231, 0.2740969062, 0.2726213634, 0.271145165, 0.2696683109, 0.2681908607, 0.266712755, 0.2652340233, 0.2637546659, 0.2622747123, 0.2607941031, 0.2593129277, 0.2578310966, 0.2563486695, 0.2548656464, 0.2533820271, 0.2518978119, 0.2504130006, 0.2489276081, 0.2474416196, 0.24595505, 0.2444678992, 0.2429801822, 0.241491884, 0.2400030196, 0.2385135889, 0.2370236069, 0.2355330586, 0.234041959, 0.2325503081, 0.2310581058, 0.2295653671, 0.228072077, 0.2265782654, 0.2250839174, 0.2235890329, 0.2220936269, 0.2205976844, 0.2191012353, 0.2176042795, 0.2161068022, 0.2146088183, 0.2131103128, 0.2116113305, 0.2101118416, 0.208611846, 0.2071113735, 0.2056104094, 0.2041089684, 0.2026070356, 0.201104641, 0.1996017545, 0.1980984062, 0.1965945959, 0.1950903237, 0.1935855895, 0.1920803934, 0.1905747503, 0.1890686601, 0.1875621229, 0.1860551536, 0.1845477372, 0.1830398887, 0.1815316081, 0.1800228953, 0.1785137653, 0.1770042181, 0.1754942536, 0.1739838719, 0.1724730879, 0.1709618866, 0.169450298, 0.167938292, 0.1664258987, 0.1649131179, 0.1633999497, 0.161886394, 0.1603724509, 0.1588581502, 0.1573434621, 0.1558284014, 0.1543129683, 0.1527971923, 0.1512810439, 0.1497645378, 0.1482476741, 0.1467304677, 0.1452129185, 0.1436950266, 0.1421768069, 0.1406582445, 0.1391393393, 0.1376201212, 0.1361005753, 0.1345807016, 0.1330605298, 0.1315400302, 0.1300192177, 0.1284981072, 0.1269766986, 0.1254549772, 0.1239329726, 0.1224106774, 0.1208880842, 0.1193652153, 0.1178420633, 0.1163186282, 0.1147949249, 0.1132709533, 0.1117467135, 0.1102222055, 0.1086974442, 0.1071724221, 0.1056471542, 0.1041216329, 0.1025958657, 0.1010698602, 0.09954361618, 0.09801714122, 0.09649042785, 0.09496349841, 0.09343633801, 0.09190895408, 0.09038136154, 0.08885355294, 0.08732553571, 0.08579730988, 0.08426889032, 0.08274026215, 0.08121144772, 0.07968243957, 0.07815324515, 0.07662386447, 0.07509429753, 0.07356456667, 0.07203464955, 0.07050457597, 0.06897433102, 0.06744392216, 0.06591334939, 0.06438262761, 0.06285175681, 0.061320737, 0.05978957191, 0.05825826526, 0.05672682077, 0.05519524589, 0.05366353691, 0.05213170499, 0.05059975013, 0.04906767607, 0.04753548279, 0.04600318149, 0.04447077215, 0.0429382585, 0.04140564054, 0.03987292573, 0.03834012151, 0.03680722415, 0.03527423739, 0.0337411724, 0.03220802546, 0.030674804, 0.02914150804, 0.02760814503, 0.02607471868, 0.02454122901, 0.02300768159, 0.02147408016, 0.01994042844, 0.01840673015, 0.01687298715, 0.01533920597, 0.01380538847, 0.01227153838, 0.01073765941, 0.009203754365, 0.007669828832, 0.006135884672, 0.004601926077, 0.003067956772, 0.001533980132}; -constant float tc02[512] = {1, 0.9999893904, 0.9999576211, 0.9999046922, 0.9998306036, 0.9997352958, 0.9996188283, 0.9994812012, 0.9993223548, 0.9991424084, 0.9989413023, 0.9987190366, 0.9984755516, 0.9982110262, 0.9979252815, 0.9976184368, 0.9972904325, 0.996941328, 0.9965711236, 0.9961798191, 0.9957674146, 0.99533391, 0.9948793054, 0.9944036603, 0.9939069748, 0.9933891892, 0.9928504229, 0.992290616, 0.9917097688, 0.9911079407, 0.9904850721, 0.9898412824, 0.9891765118, 0.9884908199, 0.9877841473, 0.9870565534, 0.9863080978, 0.9855387211, 0.9847484827, 0.9839374423, 0.9831054807, 0.982252717, 0.9813792109, 0.9804848433, 0.9795697927, 0.9786339402, 0.9776773453, 0.9767000675, 0.975702107, 0.9746835232, 0.9736442566, 0.9725843668, 0.9715039134, 0.9704028368, 0.9692812562, 0.968139112, 0.9669764638, 0.9657933712, 0.9645897746, 0.9633657932, 0.9621214271, 0.9608566165, 0.9595715404, 0.9582660794, 0.9569403529, 0.9555943608, 0.9542281032, 0.9528416395, 0.9514350295, 0.9500082731, 0.9485613704, 0.9470943809, 0.9456073046, 0.9441002607, 0.9425731897, 0.9410261512, 0.9394592047, 0.9378723502, 0.9362656474, 0.9346391559, 0.932992816, 0.9313266873, 0.9296408892, 0.9279354215, 0.9262102246, 0.9244654775, 0.9227011204, 0.920917213, 0.9191138744, 0.9172909856, 0.9154487252, 0.9135870337, 0.9117060304, 0.9098057151, 0.9078860879, 0.905947268, 0.903989315, 0.9020121694, 0.9000158906, 0.898000598, 0.8959662318, 0.893912971, 0.8918406963, 0.8897495866, 0.8876396418, 0.8855108619, 0.8833633661, 0.8811970949, 0.8790122271, 0.8768087029, 0.8745866418, 0.8723460436, 0.8700869679, 0.8678094745, 0.8655136228, 0.8631994128, 0.8608669639, 0.8585162163, 0.8561473489, 0.8537603021, 0.851355195, 0.8489320278, 0.8464909196, 0.8440318704, 0.8415549994, 0.8390602469, 0.8365477324, 0.8340175152, 0.8314695954, 0.8289040923, 0.8263210654, 0.8237205148, 0.8211025, 0.8184671402, 0.8158144355, 0.8131443858, 0.81045717, 0.8077528477, 0.8050313592, 0.8022928238, 0.7995372415, 0.796764791, 0.7939754725, 0.7911693454, 0.7883464098, 0.7855068445, 0.7826505899, 0.7797777653, 0.7768884897, 0.7739827037, 0.7710605264, 0.7681220174, 0.7651672363, 0.7621963024, 0.7592092156, 0.756205976, 0.7531868219, 0.7501516342, 0.7471005917, 0.7440337539, 0.7409511209, 0.7378528118, 0.7347388864, 0.7316094041, 0.728464365, 0.7253039479, 0.7221282125, 0.718937099, 0.7157308459, 0.7125093937, 0.7092728019, 0.7060212493, 0.7027547359, 0.6994733214, 0.696177125, 0.6928661466, 0.689540565, 0.6862003207, 0.6828455329, 0.6794763207, 0.6760926843, 0.6726947427, 0.6692826152, 0.6658562422, 0.6624158025, 0.6589612961, 0.6554928422, 0.65201056, 0.64851439, 0.6450045109, 0.6414810419, 0.6379439235, 0.6343932748, 0.630829215, 0.6272518039, 0.6236611009, 0.6200572252, 0.616440177, 0.6128100753, 0.6091670394, 0.6055110693, 0.6018422246, 0.5981606841, 0.5944665074, 0.5907596946, 0.5870403647, 0.5833086371, 0.5795645714, 0.5758081675, 0.5720396042, 0.5682589412, 0.564466238, 0.5606615543, 0.5568450093, 0.5530167222, 0.5491766334, 0.5453249812, 0.5414617658, 0.5375870466, 0.5337010026, 0.5298036337, 0.5258949995, 0.5219752789, 0.5180445313, 0.514102757, 0.510150075, 0.5061866641, 0.5022124648, 0.4982276559, 0.4942322969, 0.4902264774, 0.4862102866, 0.4821837842, 0.4781470597, 0.4741002023, 0.4700433314, 0.4659765065, 0.4618997872, 0.4578132927, 0.4537171125, 0.449611336, 0.4454960227, 0.4413712621, 0.4372371733, 0.433093816, 0.4289412796, 0.4247796834, 0.4206090868, 0.4164295495, 0.4122412205, 0.4080441594, 0.4038384557, 0.3996241987, 0.3954014778, 0.3911703825, 0.3869310021, 0.3826834261, 0.3784277439, 0.3741640747, 0.3698924482, 0.3656129837, 0.3613258004, 0.3570309579, 0.3527285457, 0.3484186828, 0.344101429, 0.3397768736, 0.3354451358, 0.3311063051, 0.3267604411, 0.3224076927, 0.3180480897, 0.3136817515, 0.3093087673, 0.3049292266, 0.3005432487, 0.296150893, 0.291752249, 0.2873474658, 0.282936573, 0.27851969, 0.2740969062, 0.2696683109, 0.2652340233, 0.2607941031, 0.2563486695, 0.2518978119, 0.2474416196, 0.2429801822, 0.2385135889, 0.234041959, 0.2295653671, 0.2250839174, 0.2205976844, 0.2161068022, 0.2116113305, 0.2071113735, 0.2026070356, 0.1980984062, 0.1935855895, 0.1890686601, 0.1845477372, 0.1800228953, 0.1754942536, 0.1709618866, 0.1664258987, 0.161886394, 0.1573434621, 0.1527971923, 0.1482476741, 0.1436950266, 0.1391393393, 0.1345807016, 0.1300192177, 0.1254549772, 0.1208880842, 0.1163186282, 0.1117467135, 0.1071724221, 0.1025958657, 0.09801714122, 0.09343633801, 0.08885355294, 0.08426889032, 0.07968243957, 0.07509429753, 0.07050457597, 0.06591334939, 0.061320737, 0.05672682077, 0.05213170499, 0.04753548279, 0.0429382585, 0.03834012151, 0.0337411724, 0.02914150804, 0.02454122901, 0.01994042844, 0.01533920597, 0.01073765941, 0.006135884672, 0.001533980132, -0.003067956772, -0.007669828832, -0.01227153838, -0.01687298715, -0.02147408016, -0.02607471868, -0.030674804, -0.03527423739, -0.03987292573, -0.04447077215, -0.04906767607, -0.05366353691, -0.05825826526, -0.06285175681, -0.06744392216, -0.07203464955, -0.07662386447, -0.08121144772, -0.08579730988, -0.09038136154, -0.09496349841, -0.09954361618, -0.1041216329, -0.1086974442, -0.1132709533, -0.1178420633, -0.1224106774, -0.1269766986, -0.1315400302, -0.1361005753, -0.1406582445, -0.1452129185, -0.1497645378, -0.1543129683, -0.1588581502, -0.1633999497, -0.167938292, -0.1724730879, -0.1770042181, -0.1815316081, -0.1860551536, -0.1905747503, -0.1950903237, -0.1996017545, -0.2041089684, -0.208611846, -0.2131103128, -0.2176042795, -0.2220936269, -0.2265782654, -0.2310581058, -0.2355330586, -0.2400030196, -0.2444678992, -0.2489276081, -0.2533820271, -0.2578310966, -0.2622747123, -0.266712755, -0.271145165, -0.2755718231, -0.27999264, -0.2844075263, -0.2888164222, -0.2932191491, -0.2976157069, -0.3020059466, -0.3063898087, -0.310767144, -0.3151379228, -0.3195020258, -0.3238593638, -0.3282098472, -0.3325533569, -0.336889863, -0.3412192166, -0.3455413282, -0.3498561382, -0.3541635275, -0.3584634066, -0.3627557158, -0.3670403361, -0.3713172078, -0.3755861819, -0.3798471987, -0.3841001987, -0.3883450329, -0.3925816715, -0.3968099952, -0.4010298848, -0.4052413106, -0.4094441533, -0.4136383235, -0.4178237021, -0.4220002592, -0.4261678755, -0.4303264916, -0.4344759583, -0.438616246, -0.4427472353, -0.4468688369, -0.4509809911, -0.4550835788, -0.4591765404, -0.4632597864, -0.4673331976, -0.4713967443, -0.4754502773, -0.479493767, -0.4835270643, -0.4875501692, -0.4915629029, -0.4955652654, -0.4995571077, -0.5035383701, -0.5075089931, -0.5114688277, -0.5154178739, -0.5193560123, -0.523283124, -0.5271991491, -0.5311040282, -0.534997642, -0.538879931, -0.5427507758, -0.5466101766, -0.5504579544, -0.5542941093, -0.5581185222, -0.5619311333, -0.5657318234, -0.5695205331, -0.573297143, -0.5770616531, -0.5808139443, -0.584553957, -0.5882815719, -0.5919966698, -0.5956993103, -0.5993893147, -0.6030666232, -0.6067311168, -0.6103827953, -0.6140215397, -0.6176472902, -0.6212599874, -0.6248595119, -0.6284457445, -0.6320187449, -0.6355783343, -0.6391244531, -0.6426570415, -0.6461760402, -0.6496813297, -0.6531728506, -0.6566505432, -0.6601143479, -0.6635641456, -0.6669999361, -0.6704215407, -0.6738290191, -0.6772221923, -0.6806010008, -0.683965385, -0.6873153448, -0.6906507015, -0.6939714551, -0.6972774863, -0.7005687952, -0.7038452625}; -constant float tc05[512] = {-0.7071067691, -0.7103533745, -0.7135848403, -0.7168012857, -0.720002532, -0.7231884599, -0.726359129, -0.72951442, -0.7326542735, -0.7357785702, -0.73888731, -0.7419804335, -0.7450577617, -0.7481193542, -0.7511651516, -0.7541949749, -0.7572088242, -0.7602066994, -0.7631884217, -0.7661539912, -0.7691033483, -0.7720363736, -0.7749531269, -0.7778534293, -0.7807372212, -0.7836045027, -0.786455214, -0.7892892361, -0.7921065688, -0.7949071527, -0.7976908684, -0.8004576564, -0.8032075167, -0.8059403896, -0.8086561561, -0.8113548756, -0.8140363097, -0.8167005777, -0.8193475008, -0.8219771385, -0.8245893121, -0.8271840215, -0.8297612071, -0.832320869, -0.8348628879, -0.8373872042, -0.8398938179, -0.8423826098, -0.84485358, -0.8473066092, -0.8497417569, -0.8521589041, -0.854557991, -0.8569389582, -0.8593018055, -0.8616464734, -0.8639728427, -0.866280973, -0.8685706854, -0.8708420396, -0.8730949759, -0.8753293753, -0.8775452971, -0.8797426224, -0.8819212914, -0.8840812445, -0.8862225413, -0.8883450627, -0.8904487491, -0.8925335407, -0.8945994973, -0.8966464996, -0.8986744881, -0.900683403, -0.9026733041, -0.9046440721, -0.9065957069, -0.9085280895, -0.9104412794, -0.9123351574, -0.9142097831, -0.9160649776, -0.9179008007, -0.919717133, -0.9215140343, -0.9232914448, -0.9250492454, -0.9267874956, -0.9285060763, -0.9302050471, -0.9318842888, -0.9335438013, -0.9351835251, -0.9368034601, -0.9384035468, -0.9399837255, -0.9415440559, -0.9430844188, -0.9446048141, -0.9461052418, -0.9475855827, -0.9490458965, -0.950486064, -0.9519061446, -0.9533060193, -0.9546857476, -0.95604527, -0.9573845267, -0.9587034583, -0.9600021243, -0.9612804651, -0.9625384808, -0.963776052, -0.9649932384, -0.9661899805, -0.9673662782, -0.9685220718, -0.9696573615, -0.9707721472, -0.9718663096, -0.9729399681, -0.9739929438, -0.9750253558, -0.9760370851, -0.9770281315, -0.9779984951, -0.9789481759, -0.9798771143, -0.9807852507, -0.9816727042, -0.9825392962, -0.9833850861, -0.9842100739, -0.9850142598, -0.9857975245, -0.9865599275, -0.9873014092, -0.9880220294, -0.9887216687, -0.9894004464, -0.9900581837, -0.9906949997, -0.9913108349, -0.9919056892, -0.9924795628, -0.9930323362, -0.9935641289, -0.9940748811, -0.9945645928, -0.9950332046, -0.9954807758, -0.9959072471, -0.9963126183, -0.9966968894, -0.9970600605, -0.9974021316, -0.997723043, -0.9980228543, -0.9983015656, -0.9985590577, -0.9987954497, -0.9990106821, -0.9992047548, -0.9993776679, -0.9995294213, -0.9996600151, -0.9997693896, -0.9998576641, -0.9999247193, -0.9999706149, -0.9999952912, -0.9999988079, -0.9999811649, -0.9999423623, -0.9998823404, -0.9998011589, -0.9996988177, -0.9995753169, -0.9994305968, -0.9992647767, -0.9990777373, -0.9988695383, -0.9986402392, -0.9983897209, -0.9981181026, -0.9978253245, -0.9975114465, -0.9971764088, -0.996820271, -0.9964430332, -0.9960446954, -0.9956252575, -0.9951847196, -0.9947231412, -0.9942404628, -0.9937367439, -0.993211925, -0.9926661253, -0.9920992851, -0.9915114641, -0.9909026623, -0.99027282, -0.9896219969, -0.9889502525, -0.988257587, -0.9875439405, -0.9868093729, -0.9860539436, -0.9852776527, -0.9844804406, -0.9836624265, -0.9828235507, -0.9819638729, -0.9810833931, -0.9801821113, -0.9792601466, -0.97831738, -0.9773538709, -0.9763697386, -0.9753648639, -0.974339366, -0.9732932448, -0.9722265005, -0.971139133, -0.9700312614, -0.9689028263, -0.9677538276, -0.9665843844, -0.9653944373, -0.9641840458, -0.9629532695, -0.9617020488, -0.9604305029, -0.9591386318, -0.9578264356, -0.9564939141, -0.9551411867, -0.9537681937, -0.9523749948, -0.9509616494, -0.9495281577, -0.9480745792, -0.946600914, -0.9451072216, -0.9435934424, -0.9420597553, -0.940506041, -0.9389324784, -0.9373390079, -0.9357256889, -0.9340925217, -0.9324396253, -0.9307669401, -0.9290745854, -0.9273625016, -0.9256308079, -0.9238795042, -0.9221086502, -0.9203183055, -0.9185084105, -0.9166790843, -0.914830327, -0.9129621983, -0.9110747576, -0.909168005, -0.9072420001, -0.9052967429, -0.9033323526, -0.9013488293, -0.8993462324, -0.8973245621, -0.8952839375, -0.893224299, -0.8911457658, -0.8890483379, -0.8869321346, -0.8847970963, -0.882643342, -0.8804708719, -0.8782798052, -0.8760700822, -0.8738418221, -0.8715950847, -0.8693298697, -0.867046237, -0.864744246, -0.8624239564, -0.8600853682, -0.8577286005, -0.8553536534, -0.8529605865, -0.8505494595, -0.8481203318, -0.8456732631, -0.8432082534, -0.8407253623, -0.838224709, -0.8357062936, -0.8331701756, -0.8306164145, -0.8280450702, -0.8254561424, -0.8228498101, -0.8202259541, -0.8175848126, -0.8149263263, -0.8122506142, -0.8095576167, -0.8068475723, -0.8041203618, -0.801376164, -0.7986149788, -0.7958369255, -0.7930419445, -0.7902302146, -0.7874017358, -0.7845565677, -0.7816948295, -0.7788165212, -0.7759217024, -0.7730104327, -0.7700828314, -0.7671388984, -0.7641787529, -0.761202395, -0.7582098842, -0.7552013993, -0.7521768212, -0.7491363883, -0.7460801005, -0.7430079579, -0.7399200797, -0.7368165851, -0.7336974144, -0.7305627465, -0.727412641, -0.724247098, -0.7210661769, -0.7178700566, -0.7146586776, -0.7114322186, -0.7081906199, -0.7049340606, -0.7016626, -0.6983762383, -0.6950750947, -0.6917592287, -0.6884287596, -0.6850836873, -0.6817240715, -0.6783500314, -0.6749616265, -0.6715589762, -0.6681420207, -0.6647109985, -0.6612658501, -0.6578066945, -0.6543335915, -0.6508466601, -0.6473459601, -0.6438315511, -0.6403034925, -0.6367618442, -0.6332067847, -0.6296382546, -0.6260563731, -0.6224612594, -0.618852973, -0.6152315736, -0.6115971804, -0.6079497933, -0.6042895317, -0.6006164551, -0.5969306827, -0.5932322741, -0.5895212889, -0.5857978463, -0.582062006, -0.5783137679, -0.5745533705, -0.5707807541, -0.566996038, -0.5631993413, -0.5593907237, -0.5555702448, -0.5517379642, -0.5478940606, -0.5440385342, -0.5401714444, -0.5362929702, -0.5324031115, -0.5285019875, -0.5245896578, -0.5206662416, -0.5167317986, -0.5127863884, -0.5088301301, -0.5048630834, -0.5008853674, -0.4968970418, -0.492898196, -0.4888888896, -0.4848692417, -0.4808393419, -0.4767992198, -0.4727490246, -0.4686888158, -0.4646186829, -0.4605387151, -0.4564489722, -0.4523495734, -0.448240608, -0.4441221356, -0.4399942756, -0.4358570874, -0.4317106605, -0.4275550842, -0.4233904779, -0.4192169011, -0.4150344133, -0.4108431637, -0.4066432118, -0.4024346471, -0.3982175589, -0.3939920366, -0.3897581697, -0.3855160475, -0.3812657595, -0.3770074248, -0.3727410734, -0.3684668243, -0.3641847968, -0.3598950505, -0.3555976748, -0.3512927592, -0.3469804227, -0.3426607251, -0.3383337557, -0.3339996636, -0.3296584487, -0.3253102899, -0.3209552467, -0.3165933788, -0.3122248054, -0.3078496456, -0.3034679592, -0.2990798354, -0.2946853638, -0.2902846634, -0.2858778238, -0.2814649343, -0.2770460844, -0.2726213634, -0.2681908607, -0.2637546659, -0.2593129277, -0.2548656464, -0.2504130006, -0.24595505, -0.241491884, -0.2370236069, -0.2325503081, -0.228072077, -0.2235890329, -0.2191012353, -0.2146088183, -0.2101118416, -0.2056104094, -0.201104641, -0.1965945959, -0.1920803934, -0.1875621229, -0.1830398887, -0.1785137653, -0.1739838719, -0.169450298, -0.1649131179, -0.1603724509, -0.1558284014, -0.1512810439, -0.1467304677, -0.1421768069, -0.1376201212, -0.1330605298, -0.1284981072, -0.1239329726, -0.1193652153, -0.1147949249, -0.1102222055, -0.1056471542, -0.1010698602, -0.09649042785, -0.09190895408, -0.08732553571, -0.08274026215, -0.07815324515, -0.07356456667, -0.06897433102, -0.06438262761, -0.05978957191, -0.05519524589, -0.05059975013, -0.04600318149, -0.04140564054, -0.03680722415, -0.03220802546, -0.02760814503, -0.02300768159, -0.01840673015, -0.01380538847, -0.009203754365, -0.004601926077}; -constant float tc10[512] = {1, 0.9999247193, 0.9996988177, 0.9993223548, 0.9987954497, 0.9981181026, 0.9972904325, 0.9963126183, 0.9951847196, 0.9939069748, 0.9924795628, 0.9909026623, 0.9891765118, 0.9873014092, 0.9852776527, 0.9831054807, 0.9807852507, 0.97831738, 0.975702107, 0.9729399681, 0.9700312614, 0.9669764638, 0.963776052, 0.9604305029, 0.9569403529, 0.9533060193, 0.9495281577, 0.9456073046, 0.9415440559, 0.9373390079, 0.932992816, 0.9285060763, 0.9238795042, 0.9191138744, 0.9142097831, 0.909168005, 0.903989315, 0.8986744881, 0.893224299, 0.8876396418, 0.8819212914, 0.8760700822, 0.8700869679, 0.8639728427, 0.8577286005, 0.851355195, 0.84485358, 0.838224709, 0.8314695954, 0.8245893121, 0.8175848126, 0.81045717, 0.8032075167, 0.7958369255, 0.7883464098, 0.7807372212, 0.7730104327, 0.7651672363, 0.7572088242, 0.7491363883, 0.7409511209, 0.7326542735, 0.724247098, 0.7157308459, 0.7071067691, 0.6983762383, 0.689540565, 0.6806010008, 0.6715589762, 0.6624158025, 0.6531728506, 0.6438315511, 0.6343932748, 0.6248595119, 0.6152315736, 0.6055110693, 0.5956993103, 0.5857978463, 0.5758081675, 0.5657318234, 0.5555702448, 0.5453249812, 0.534997642, 0.5245896578, 0.514102757, 0.5035383701, 0.492898196, 0.4821837842, 0.4713967443, 0.4605387151, 0.449611336, 0.438616246, 0.4275550842, 0.4164295495, 0.4052413106, 0.3939920366, 0.3826834261, 0.3713172078, 0.3598950505, 0.3484186828, 0.336889863, 0.3253102899, 0.3136817515, 0.3020059466, 0.2902846634, 0.27851969, 0.266712755, 0.2548656464, 0.2429801822, 0.2310581058, 0.2191012353, 0.2071113735, 0.1950903237, 0.1830398887, 0.1709618866, 0.1588581502, 0.1467304677, 0.1345807016, 0.1224106774, 0.1102222055, 0.09801714122, 0.08579730988, 0.07356456667, 0.061320737, 0.04906767607, 0.03680722415, 0.02454122901, 0.01227153838, 6.123234263e-17, -0.01227153838, -0.02454122901, -0.03680722415, -0.04906767607, -0.061320737, -0.07356456667, -0.08579730988, -0.09801714122, -0.1102222055, -0.1224106774, -0.1345807016, -0.1467304677, -0.1588581502, -0.1709618866, -0.1830398887, -0.1950903237, -0.2071113735, -0.2191012353, -0.2310581058, -0.2429801822, -0.2548656464, -0.266712755, -0.27851969, -0.2902846634, -0.3020059466, -0.3136817515, -0.3253102899, -0.336889863, -0.3484186828, -0.3598950505, -0.3713172078, -0.3826834261, -0.3939920366, -0.4052413106, -0.4164295495, -0.4275550842, -0.438616246, -0.449611336, -0.4605387151, -0.4713967443, -0.4821837842, -0.492898196, -0.5035383701, -0.514102757, -0.5245896578, -0.534997642, -0.5453249812, -0.5555702448, -0.5657318234, -0.5758081675, -0.5857978463, -0.5956993103, -0.6055110693, -0.6152315736, -0.6248595119, -0.6343932748, -0.6438315511, -0.6531728506, -0.6624158025, -0.6715589762, -0.6806010008, -0.689540565, -0.6983762383, -0.7071067691, -0.7157308459, -0.724247098, -0.7326542735, -0.7409511209, -0.7491363883, -0.7572088242, -0.7651672363, -0.7730104327, -0.7807372212, -0.7883464098, -0.7958369255, -0.8032075167, -0.81045717, -0.8175848126, -0.8245893121, -0.8314695954, -0.838224709, -0.84485358, -0.851355195, -0.8577286005, -0.8639728427, -0.8700869679, -0.8760700822, -0.8819212914, -0.8876396418, -0.893224299, -0.8986744881, -0.903989315, -0.909168005, -0.9142097831, -0.9191138744, -0.9238795042, -0.9285060763, -0.932992816, -0.9373390079, -0.9415440559, -0.9456073046, -0.9495281577, -0.9533060193, -0.9569403529, -0.9604305029, -0.963776052, -0.9669764638, -0.9700312614, -0.9729399681, -0.975702107, -0.97831738, -0.9807852507, -0.9831054807, -0.9852776527, -0.9873014092, -0.9891765118, -0.9909026623, -0.9924795628, -0.9939069748, -0.9951847196, -0.9963126183, -0.9972904325, -0.9981181026, -0.9987954497, -0.9993223548, -0.9996988177, -0.9999247193, 1, 0.9999247193, 0.9996988177, 0.9993223548, 0.9987954497, 0.9981181026, 0.9972904325, 0.9963126183, 0.9951847196, 0.9939069748, 0.9924795628, 0.9909026623, 0.9891765118, 0.9873014092, 0.9852776527, 0.9831054807, 0.9807852507, 0.97831738, 0.975702107, 0.9729399681, 0.9700312614, 0.9669764638, 0.963776052, 0.9604305029, 0.9569403529, 0.9533060193, 0.9495281577, 0.9456073046, 0.9415440559, 0.9373390079, 0.932992816, 0.9285060763, 0.9238795042, 0.9191138744, 0.9142097831, 0.909168005, 0.903989315, 0.8986744881, 0.893224299, 0.8876396418, 0.8819212914, 0.8760700822, 0.8700869679, 0.8639728427, 0.8577286005, 0.851355195, 0.84485358, 0.838224709, 0.8314695954, 0.8245893121, 0.8175848126, 0.81045717, 0.8032075167, 0.7958369255, 0.7883464098, 0.7807372212, 0.7730104327, 0.7651672363, 0.7572088242, 0.7491363883, 0.7409511209, 0.7326542735, 0.724247098, 0.7157308459, 0.7071067691, 0.6983762383, 0.689540565, 0.6806010008, 0.6715589762, 0.6624158025, 0.6531728506, 0.6438315511, 0.6343932748, 0.6248595119, 0.6152315736, 0.6055110693, 0.5956993103, 0.5857978463, 0.5758081675, 0.5657318234, 0.5555702448, 0.5453249812, 0.534997642, 0.5245896578, 0.514102757, 0.5035383701, 0.492898196, 0.4821837842, 0.4713967443, 0.4605387151, 0.449611336, 0.438616246, 0.4275550842, 0.4164295495, 0.4052413106, 0.3939920366, 0.3826834261, 0.3713172078, 0.3598950505, 0.3484186828, 0.336889863, 0.3253102899, 0.3136817515, 0.3020059466, 0.2902846634, 0.27851969, 0.266712755, 0.2548656464, 0.2429801822, 0.2310581058, 0.2191012353, 0.2071113735, 0.1950903237, 0.1830398887, 0.1709618866, 0.1588581502, 0.1467304677, 0.1345807016, 0.1224106774, 0.1102222055, 0.09801714122, 0.08579730988, 0.07356456667, 0.061320737, 0.04906767607, 0.03680722415, 0.02454122901, 0.01227153838, 6.123234263e-17, -0.01227153838, -0.02454122901, -0.03680722415, -0.04906767607, -0.061320737, -0.07356456667, -0.08579730988, -0.09801714122, -0.1102222055, -0.1224106774, -0.1345807016, -0.1467304677, -0.1588581502, -0.1709618866, -0.1830398887, -0.1950903237, -0.2071113735, -0.2191012353, -0.2310581058, -0.2429801822, -0.2548656464, -0.266712755, -0.27851969, -0.2902846634, -0.3020059466, -0.3136817515, -0.3253102899, -0.336889863, -0.3484186828, -0.3598950505, -0.3713172078, -0.3826834261, -0.3939920366, -0.4052413106, -0.4164295495, -0.4275550842, -0.438616246, -0.449611336, -0.4605387151, -0.4713967443, -0.4821837842, -0.492898196, -0.5035383701, -0.514102757, -0.5245896578, -0.534997642, -0.5453249812, -0.5555702448, -0.5657318234, -0.5758081675, -0.5857978463, -0.5956993103, -0.6055110693, -0.6152315736, -0.6248595119, -0.6343932748, -0.6438315511, -0.6531728506, -0.6624158025, -0.6715589762, -0.6806010008, -0.689540565, -0.6983762383, -0.7071067691, -0.7157308459, -0.724247098, -0.7326542735, -0.7409511209, -0.7491363883, -0.7572088242, -0.7651672363, -0.7730104327, -0.7807372212, -0.7883464098, -0.7958369255, -0.8032075167, -0.81045717, -0.8175848126, -0.8245893121, -0.8314695954, -0.838224709, -0.84485358, -0.851355195, -0.8577286005, -0.8639728427, -0.8700869679, -0.8760700822, -0.8819212914, -0.8876396418, -0.893224299, -0.8986744881, -0.903989315, -0.909168005, -0.9142097831, -0.9191138744, -0.9238795042, -0.9285060763, -0.932992816, -0.9373390079, -0.9415440559, -0.9456073046, -0.9495281577, -0.9533060193, -0.9569403529, -0.9604305029, -0.963776052, -0.9669764638, -0.9700312614, -0.9729399681, -0.975702107, -0.97831738, -0.9807852507, -0.9831054807, -0.9852776527, -0.9873014092, -0.9891765118, -0.9909026623, -0.9924795628, -0.9939069748, -0.9951847196, -0.9963126183, -0.9972904325, -0.9981181026, -0.9987954497, -0.9993223548, -0.9996988177, -0.9999247193}; -constant float tc13[512] = {1, 0.9999247193, 0.9996988177, 0.9993223548, 0.9987954497, 0.9981181026, 0.9972904325, 0.9963126183, 0.9951847196, 0.9939069748, 0.9924795628, 0.9909026623, 0.9891765118, 0.9873014092, 0.9852776527, 0.9831054807, 0.9807852507, 0.97831738, 0.975702107, 0.9729399681, 0.9700312614, 0.9669764638, 0.963776052, 0.9604305029, 0.9569403529, 0.9533060193, 0.9495281577, 0.9456073046, 0.9415440559, 0.9373390079, 0.932992816, 0.9285060763, 0.9238795042, 0.9191138744, 0.9142097831, 0.909168005, 0.903989315, 0.8986744881, 0.893224299, 0.8876396418, 0.8819212914, 0.8760700822, 0.8700869679, 0.8639728427, 0.8577286005, 0.851355195, 0.84485358, 0.838224709, 0.8314695954, 0.8245893121, 0.8175848126, 0.81045717, 0.8032075167, 0.7958369255, 0.7883464098, 0.7807372212, 0.7730104327, 0.7651672363, 0.7572088242, 0.7491363883, 0.7409511209, 0.7326542735, 0.724247098, 0.7157308459, 0.7071067691, 0.6983762383, 0.689540565, 0.6806010008, 0.6715589762, 0.6624158025, 0.6531728506, 0.6438315511, 0.6343932748, 0.6248595119, 0.6152315736, 0.6055110693, 0.5956993103, 0.5857978463, 0.5758081675, 0.5657318234, 0.5555702448, 0.5453249812, 0.534997642, 0.5245896578, 0.514102757, 0.5035383701, 0.492898196, 0.4821837842, 0.4713967443, 0.4605387151, 0.449611336, 0.438616246, 0.4275550842, 0.4164295495, 0.4052413106, 0.3939920366, 0.3826834261, 0.3713172078, 0.3598950505, 0.3484186828, 0.336889863, 0.3253102899, 0.3136817515, 0.3020059466, 0.2902846634, 0.27851969, 0.266712755, 0.2548656464, 0.2429801822, 0.2310581058, 0.2191012353, 0.2071113735, 0.1950903237, 0.1830398887, 0.1709618866, 0.1588581502, 0.1467304677, 0.1345807016, 0.1224106774, 0.1102222055, 0.09801714122, 0.08579730988, 0.07356456667, 0.061320737, 0.04906767607, 0.03680722415, 0.02454122901, 0.01227153838, 6.123234263e-17, -0.01227153838, -0.02454122901, -0.03680722415, -0.04906767607, -0.061320737, -0.07356456667, -0.08579730988, -0.09801714122, -0.1102222055, -0.1224106774, -0.1345807016, -0.1467304677, -0.1588581502, -0.1709618866, -0.1830398887, -0.1950903237, -0.2071113735, -0.2191012353, -0.2310581058, -0.2429801822, -0.2548656464, -0.266712755, -0.27851969, -0.2902846634, -0.3020059466, -0.3136817515, -0.3253102899, -0.336889863, -0.3484186828, -0.3598950505, -0.3713172078, -0.3826834261, -0.3939920366, -0.4052413106, -0.4164295495, -0.4275550842, -0.438616246, -0.449611336, -0.4605387151, -0.4713967443, -0.4821837842, -0.492898196, -0.5035383701, -0.514102757, -0.5245896578, -0.534997642, -0.5453249812, -0.5555702448, -0.5657318234, -0.5758081675, -0.5857978463, -0.5956993103, -0.6055110693, -0.6152315736, -0.6248595119, -0.6343932748, -0.6438315511, -0.6531728506, -0.6624158025, -0.6715589762, -0.6806010008, -0.689540565, -0.6983762383, -0.7071067691, -0.7157308459, -0.724247098, -0.7326542735, -0.7409511209, -0.7491363883, -0.7572088242, -0.7651672363, -0.7730104327, -0.7807372212, -0.7883464098, -0.7958369255, -0.8032075167, -0.81045717, -0.8175848126, -0.8245893121, -0.8314695954, -0.838224709, -0.84485358, -0.851355195, -0.8577286005, -0.8639728427, -0.8700869679, -0.8760700822, -0.8819212914, -0.8876396418, -0.893224299, -0.8986744881, -0.903989315, -0.909168005, -0.9142097831, -0.9191138744, -0.9238795042, -0.9285060763, -0.932992816, -0.9373390079, -0.9415440559, -0.9456073046, -0.9495281577, -0.9533060193, -0.9569403529, -0.9604305029, -0.963776052, -0.9669764638, -0.9700312614, -0.9729399681, -0.975702107, -0.97831738, -0.9807852507, -0.9831054807, -0.9852776527, -0.9873014092, -0.9891765118, -0.9909026623, -0.9924795628, -0.9939069748, -0.9951847196, -0.9963126183, -0.9972904325, -0.9981181026, -0.9987954497, -0.9993223548, -0.9996988177, -0.9999247193, 1, 0.9999247193, 0.9996988177, 0.9993223548, 0.9987954497, 0.9981181026, 0.9972904325, 0.9963126183, 0.9951847196, 0.9939069748, 0.9924795628, 0.9909026623, 0.9891765118, 0.9873014092, 0.9852776527, 0.9831054807, 0.9807852507, 0.97831738, 0.975702107, 0.9729399681, 0.9700312614, 0.9669764638, 0.963776052, 0.9604305029, 0.9569403529, 0.9533060193, 0.9495281577, 0.9456073046, 0.9415440559, 0.9373390079, 0.932992816, 0.9285060763, 0.9238795042, 0.9191138744, 0.9142097831, 0.909168005, 0.903989315, 0.8986744881, 0.893224299, 0.8876396418, 0.8819212914, 0.8760700822, 0.8700869679, 0.8639728427, 0.8577286005, 0.851355195, 0.84485358, 0.838224709, 0.8314695954, 0.8245893121, 0.8175848126, 0.81045717, 0.8032075167, 0.7958369255, 0.7883464098, 0.7807372212, 0.7730104327, 0.7651672363, 0.7572088242, 0.7491363883, 0.7409511209, 0.7326542735, 0.724247098, 0.7157308459, 0.7071067691, 0.6983762383, 0.689540565, 0.6806010008, 0.6715589762, 0.6624158025, 0.6531728506, 0.6438315511, 0.6343932748, 0.6248595119, 0.6152315736, 0.6055110693, 0.5956993103, 0.5857978463, 0.5758081675, 0.5657318234, 0.5555702448, 0.5453249812, 0.534997642, 0.5245896578, 0.514102757, 0.5035383701, 0.492898196, 0.4821837842, 0.4713967443, 0.4605387151, 0.449611336, 0.438616246, 0.4275550842, 0.4164295495, 0.4052413106, 0.3939920366, 0.3826834261, 0.3713172078, 0.3598950505, 0.3484186828, 0.336889863, 0.3253102899, 0.3136817515, 0.3020059466, 0.2902846634, 0.27851969, 0.266712755, 0.2548656464, 0.2429801822, 0.2310581058, 0.2191012353, 0.2071113735, 0.1950903237, 0.1830398887, 0.1709618866, 0.1588581502, 0.1467304677, 0.1345807016, 0.1224106774, 0.1102222055, 0.09801714122, 0.08579730988, 0.07356456667, 0.061320737, 0.04906767607, 0.03680722415, 0.02454122901, 0.01227153838, 6.123234263e-17, -0.01227153838, -0.02454122901, -0.03680722415, -0.04906767607, -0.061320737, -0.07356456667, -0.08579730988, -0.09801714122, -0.1102222055, -0.1224106774, -0.1345807016, -0.1467304677, -0.1588581502, -0.1709618866, -0.1830398887, -0.1950903237, -0.2071113735, -0.2191012353, -0.2310581058, -0.2429801822, -0.2548656464, -0.266712755, -0.27851969, -0.2902846634, -0.3020059466, -0.3136817515, -0.3253102899, -0.336889863, -0.3484186828, -0.3598950505, -0.3713172078, -0.3826834261, -0.3939920366, -0.4052413106, -0.4164295495, -0.4275550842, -0.438616246, -0.449611336, -0.4605387151, -0.4713967443, -0.4821837842, -0.492898196, -0.5035383701, -0.514102757, -0.5245896578, -0.534997642, -0.5453249812, -0.5555702448, -0.5657318234, -0.5758081675, -0.5857978463, -0.5956993103, -0.6055110693, -0.6152315736, -0.6248595119, -0.6343932748, -0.6438315511, -0.6531728506, -0.6624158025, -0.6715589762, -0.6806010008, -0.689540565, -0.6983762383, -0.7071067691, -0.7157308459, -0.724247098, -0.7326542735, -0.7409511209, -0.7491363883, -0.7572088242, -0.7651672363, -0.7730104327, -0.7807372212, -0.7883464098, -0.7958369255, -0.8032075167, -0.81045717, -0.8175848126, -0.8245893121, -0.8314695954, -0.838224709, -0.84485358, -0.851355195, -0.8577286005, -0.8639728427, -0.8700869679, -0.8760700822, -0.8819212914, -0.8876396418, -0.893224299, -0.8986744881, -0.903989315, -0.909168005, -0.9142097831, -0.9191138744, -0.9238795042, -0.9285060763, -0.932992816, -0.9373390079, -0.9415440559, -0.9456073046, -0.9495281577, -0.9533060193, -0.9569403529, -0.9604305029, -0.963776052, -0.9669764638, -0.9700312614, -0.9729399681, -0.975702107, -0.97831738, -0.9807852507, -0.9831054807, -0.9852776527, -0.9873014092, -0.9891765118, -0.9909026623, -0.9924795628, -0.9939069748, -0.9951847196, -0.9963126183, -0.9972904325, -0.9981181026, -0.9987954497, -0.9993223548, -0.9996988177, -0.9999247193}; -constant float tc11[512] = {1, 0.9999811649, 0.9999247193, 0.9998306036, 0.9996988177, 0.9995294213, 0.9993223548, 0.9990777373, 0.9987954497, 0.9984755516, 0.9981181026, 0.997723043, 0.9972904325, 0.996820271, 0.9963126183, 0.9957674146, 0.9951847196, 0.9945645928, 0.9939069748, 0.993211925, 0.9924795628, 0.9917097688, 0.9909026623, 0.9900581837, 0.9891765118, 0.988257587, 0.9873014092, 0.9863080978, 0.9852776527, 0.9842100739, 0.9831054807, 0.9819638729, 0.9807852507, 0.9795697927, 0.97831738, 0.9770281315, 0.975702107, 0.974339366, 0.9729399681, 0.9715039134, 0.9700312614, 0.9685220718, 0.9669764638, 0.9653944373, 0.963776052, 0.9621214271, 0.9604305029, 0.9587034583, 0.9569403529, 0.9551411867, 0.9533060193, 0.9514350295, 0.9495281577, 0.9475855827, 0.9456073046, 0.9435934424, 0.9415440559, 0.9394592047, 0.9373390079, 0.9351835251, 0.932992816, 0.9307669401, 0.9285060763, 0.9262102246, 0.9238795042, 0.9215140343, 0.9191138744, 0.9166790843, 0.9142097831, 0.9117060304, 0.909168005, 0.9065957069, 0.903989315, 0.9013488293, 0.8986744881, 0.8959662318, 0.893224299, 0.8904487491, 0.8876396418, 0.8847970963, 0.8819212914, 0.8790122271, 0.8760700822, 0.8730949759, 0.8700869679, 0.867046237, 0.8639728427, 0.8608669639, 0.8577286005, 0.854557991, 0.851355195, 0.8481203318, 0.84485358, 0.8415549994, 0.838224709, 0.8348628879, 0.8314695954, 0.8280450702, 0.8245893121, 0.8211025, 0.8175848126, 0.8140363097, 0.81045717, 0.8068475723, 0.8032075167, 0.7995372415, 0.7958369255, 0.7921065688, 0.7883464098, 0.7845565677, 0.7807372212, 0.7768884897, 0.7730104327, 0.7691033483, 0.7651672363, 0.761202395, 0.7572088242, 0.7531868219, 0.7491363883, 0.7450577617, 0.7409511209, 0.7368165851, 0.7326542735, 0.728464365, 0.724247098, 0.720002532, 0.7157308459, 0.7114322186, 0.7071067691, 0.7027547359, 0.6983762383, 0.6939714551, 0.689540565, 0.6850836873, 0.6806010008, 0.6760926843, 0.6715589762, 0.6669999361, 0.6624158025, 0.6578066945, 0.6531728506, 0.64851439, 0.6438315511, 0.6391244531, 0.6343932748, 0.6296382546, 0.6248595119, 0.6200572252, 0.6152315736, 0.6103827953, 0.6055110693, 0.6006164551, 0.5956993103, 0.5907596946, 0.5857978463, 0.5808139443, 0.5758081675, 0.5707807541, 0.5657318234, 0.5606615543, 0.5555702448, 0.5504579544, 0.5453249812, 0.5401714444, 0.534997642, 0.5298036337, 0.5245896578, 0.5193560123, 0.514102757, 0.5088301301, 0.5035383701, 0.4982276559, 0.492898196, 0.4875501692, 0.4821837842, 0.4767992198, 0.4713967443, 0.4659765065, 0.4605387151, 0.4550835788, 0.449611336, 0.4441221356, 0.438616246, 0.433093816, 0.4275550842, 0.4220002592, 0.4164295495, 0.4108431637, 0.4052413106, 0.3996241987, 0.3939920366, 0.3883450329, 0.3826834261, 0.3770074248, 0.3713172078, 0.3656129837, 0.3598950505, 0.3541635275, 0.3484186828, 0.3426607251, 0.336889863, 0.3311063051, 0.3253102899, 0.3195020258, 0.3136817515, 0.3078496456, 0.3020059466, 0.296150893, 0.2902846634, 0.2844075263, 0.27851969, 0.2726213634, 0.266712755, 0.2607941031, 0.2548656464, 0.2489276081, 0.2429801822, 0.2370236069, 0.2310581058, 0.2250839174, 0.2191012353, 0.2131103128, 0.2071113735, 0.201104641, 0.1950903237, 0.1890686601, 0.1830398887, 0.1770042181, 0.1709618866, 0.1649131179, 0.1588581502, 0.1527971923, 0.1467304677, 0.1406582445, 0.1345807016, 0.1284981072, 0.1224106774, 0.1163186282, 0.1102222055, 0.1041216329, 0.09801714122, 0.09190895408, 0.08579730988, 0.07968243957, 0.07356456667, 0.06744392216, 0.061320737, 0.05519524589, 0.04906767607, 0.0429382585, 0.03680722415, 0.030674804, 0.02454122901, 0.01840673015, 0.01227153838, 0.006135884672, 1, 0.9999811649, 0.9999247193, 0.9998306036, 0.9996988177, 0.9995294213, 0.9993223548, 0.9990777373, 0.9987954497, 0.9984755516, 0.9981181026, 0.997723043, 0.9972904325, 0.996820271, 0.9963126183, 0.9957674146, 0.9951847196, 0.9945645928, 0.9939069748, 0.993211925, 0.9924795628, 0.9917097688, 0.9909026623, 0.9900581837, 0.9891765118, 0.988257587, 0.9873014092, 0.9863080978, 0.9852776527, 0.9842100739, 0.9831054807, 0.9819638729, 0.9807852507, 0.9795697927, 0.97831738, 0.9770281315, 0.975702107, 0.974339366, 0.9729399681, 0.9715039134, 0.9700312614, 0.9685220718, 0.9669764638, 0.9653944373, 0.963776052, 0.9621214271, 0.9604305029, 0.9587034583, 0.9569403529, 0.9551411867, 0.9533060193, 0.9514350295, 0.9495281577, 0.9475855827, 0.9456073046, 0.9435934424, 0.9415440559, 0.9394592047, 0.9373390079, 0.9351835251, 0.932992816, 0.9307669401, 0.9285060763, 0.9262102246, 0.9238795042, 0.9215140343, 0.9191138744, 0.9166790843, 0.9142097831, 0.9117060304, 0.909168005, 0.9065957069, 0.903989315, 0.9013488293, 0.8986744881, 0.8959662318, 0.893224299, 0.8904487491, 0.8876396418, 0.8847970963, 0.8819212914, 0.8790122271, 0.8760700822, 0.8730949759, 0.8700869679, 0.867046237, 0.8639728427, 0.8608669639, 0.8577286005, 0.854557991, 0.851355195, 0.8481203318, 0.84485358, 0.8415549994, 0.838224709, 0.8348628879, 0.8314695954, 0.8280450702, 0.8245893121, 0.8211025, 0.8175848126, 0.8140363097, 0.81045717, 0.8068475723, 0.8032075167, 0.7995372415, 0.7958369255, 0.7921065688, 0.7883464098, 0.7845565677, 0.7807372212, 0.7768884897, 0.7730104327, 0.7691033483, 0.7651672363, 0.761202395, 0.7572088242, 0.7531868219, 0.7491363883, 0.7450577617, 0.7409511209, 0.7368165851, 0.7326542735, 0.728464365, 0.724247098, 0.720002532, 0.7157308459, 0.7114322186, 0.7071067691, 0.7027547359, 0.6983762383, 0.6939714551, 0.689540565, 0.6850836873, 0.6806010008, 0.6760926843, 0.6715589762, 0.6669999361, 0.6624158025, 0.6578066945, 0.6531728506, 0.64851439, 0.6438315511, 0.6391244531, 0.6343932748, 0.6296382546, 0.6248595119, 0.6200572252, 0.6152315736, 0.6103827953, 0.6055110693, 0.6006164551, 0.5956993103, 0.5907596946, 0.5857978463, 0.5808139443, 0.5758081675, 0.5707807541, 0.5657318234, 0.5606615543, 0.5555702448, 0.5504579544, 0.5453249812, 0.5401714444, 0.534997642, 0.5298036337, 0.5245896578, 0.5193560123, 0.514102757, 0.5088301301, 0.5035383701, 0.4982276559, 0.492898196, 0.4875501692, 0.4821837842, 0.4767992198, 0.4713967443, 0.4659765065, 0.4605387151, 0.4550835788, 0.449611336, 0.4441221356, 0.438616246, 0.433093816, 0.4275550842, 0.4220002592, 0.4164295495, 0.4108431637, 0.4052413106, 0.3996241987, 0.3939920366, 0.3883450329, 0.3826834261, 0.3770074248, 0.3713172078, 0.3656129837, 0.3598950505, 0.3541635275, 0.3484186828, 0.3426607251, 0.336889863, 0.3311063051, 0.3253102899, 0.3195020258, 0.3136817515, 0.3078496456, 0.3020059466, 0.296150893, 0.2902846634, 0.2844075263, 0.27851969, 0.2726213634, 0.266712755, 0.2607941031, 0.2548656464, 0.2489276081, 0.2429801822, 0.2370236069, 0.2310581058, 0.2250839174, 0.2191012353, 0.2131103128, 0.2071113735, 0.201104641, 0.1950903237, 0.1890686601, 0.1830398887, 0.1770042181, 0.1709618866, 0.1649131179, 0.1588581502, 0.1527971923, 0.1467304677, 0.1406582445, 0.1345807016, 0.1284981072, 0.1224106774, 0.1163186282, 0.1102222055, 0.1041216329, 0.09801714122, 0.09190895408, 0.08579730988, 0.07968243957, 0.07356456667, 0.06744392216, 0.061320737, 0.05519524589, 0.04906767607, 0.0429382585, 0.03680722415, 0.030674804, 0.02454122901, 0.01840673015, 0.01227153838, 0.006135884672}; -constant float tc14[512] = {1, 0.9999811649, 0.9999247193, 0.9998306036, 0.9996988177, 0.9995294213, 0.9993223548, 0.9990777373, 0.9987954497, 0.9984755516, 0.9981181026, 0.997723043, 0.9972904325, 0.996820271, 0.9963126183, 0.9957674146, 0.9951847196, 0.9945645928, 0.9939069748, 0.993211925, 0.9924795628, 0.9917097688, 0.9909026623, 0.9900581837, 0.9891765118, 0.988257587, 0.9873014092, 0.9863080978, 0.9852776527, 0.9842100739, 0.9831054807, 0.9819638729, 0.9807852507, 0.9795697927, 0.97831738, 0.9770281315, 0.975702107, 0.974339366, 0.9729399681, 0.9715039134, 0.9700312614, 0.9685220718, 0.9669764638, 0.9653944373, 0.963776052, 0.9621214271, 0.9604305029, 0.9587034583, 0.9569403529, 0.9551411867, 0.9533060193, 0.9514350295, 0.9495281577, 0.9475855827, 0.9456073046, 0.9435934424, 0.9415440559, 0.9394592047, 0.9373390079, 0.9351835251, 0.932992816, 0.9307669401, 0.9285060763, 0.9262102246, 0.9238795042, 0.9215140343, 0.9191138744, 0.9166790843, 0.9142097831, 0.9117060304, 0.909168005, 0.9065957069, 0.903989315, 0.9013488293, 0.8986744881, 0.8959662318, 0.893224299, 0.8904487491, 0.8876396418, 0.8847970963, 0.8819212914, 0.8790122271, 0.8760700822, 0.8730949759, 0.8700869679, 0.867046237, 0.8639728427, 0.8608669639, 0.8577286005, 0.854557991, 0.851355195, 0.8481203318, 0.84485358, 0.8415549994, 0.838224709, 0.8348628879, 0.8314695954, 0.8280450702, 0.8245893121, 0.8211025, 0.8175848126, 0.8140363097, 0.81045717, 0.8068475723, 0.8032075167, 0.7995372415, 0.7958369255, 0.7921065688, 0.7883464098, 0.7845565677, 0.7807372212, 0.7768884897, 0.7730104327, 0.7691033483, 0.7651672363, 0.761202395, 0.7572088242, 0.7531868219, 0.7491363883, 0.7450577617, 0.7409511209, 0.7368165851, 0.7326542735, 0.728464365, 0.724247098, 0.720002532, 0.7157308459, 0.7114322186, 0.7071067691, 0.7027547359, 0.6983762383, 0.6939714551, 0.689540565, 0.6850836873, 0.6806010008, 0.6760926843, 0.6715589762, 0.6669999361, 0.6624158025, 0.6578066945, 0.6531728506, 0.64851439, 0.6438315511, 0.6391244531, 0.6343932748, 0.6296382546, 0.6248595119, 0.6200572252, 0.6152315736, 0.6103827953, 0.6055110693, 0.6006164551, 0.5956993103, 0.5907596946, 0.5857978463, 0.5808139443, 0.5758081675, 0.5707807541, 0.5657318234, 0.5606615543, 0.5555702448, 0.5504579544, 0.5453249812, 0.5401714444, 0.534997642, 0.5298036337, 0.5245896578, 0.5193560123, 0.514102757, 0.5088301301, 0.5035383701, 0.4982276559, 0.492898196, 0.4875501692, 0.4821837842, 0.4767992198, 0.4713967443, 0.4659765065, 0.4605387151, 0.4550835788, 0.449611336, 0.4441221356, 0.438616246, 0.433093816, 0.4275550842, 0.4220002592, 0.4164295495, 0.4108431637, 0.4052413106, 0.3996241987, 0.3939920366, 0.3883450329, 0.3826834261, 0.3770074248, 0.3713172078, 0.3656129837, 0.3598950505, 0.3541635275, 0.3484186828, 0.3426607251, 0.336889863, 0.3311063051, 0.3253102899, 0.3195020258, 0.3136817515, 0.3078496456, 0.3020059466, 0.296150893, 0.2902846634, 0.2844075263, 0.27851969, 0.2726213634, 0.266712755, 0.2607941031, 0.2548656464, 0.2489276081, 0.2429801822, 0.2370236069, 0.2310581058, 0.2250839174, 0.2191012353, 0.2131103128, 0.2071113735, 0.201104641, 0.1950903237, 0.1890686601, 0.1830398887, 0.1770042181, 0.1709618866, 0.1649131179, 0.1588581502, 0.1527971923, 0.1467304677, 0.1406582445, 0.1345807016, 0.1284981072, 0.1224106774, 0.1163186282, 0.1102222055, 0.1041216329, 0.09801714122, 0.09190895408, 0.08579730988, 0.07968243957, 0.07356456667, 0.06744392216, 0.061320737, 0.05519524589, 0.04906767607, 0.0429382585, 0.03680722415, 0.030674804, 0.02454122901, 0.01840673015, 0.01227153838, 0.006135884672, 1, 0.9999811649, 0.9999247193, 0.9998306036, 0.9996988177, 0.9995294213, 0.9993223548, 0.9990777373, 0.9987954497, 0.9984755516, 0.9981181026, 0.997723043, 0.9972904325, 0.996820271, 0.9963126183, 0.9957674146, 0.9951847196, 0.9945645928, 0.9939069748, 0.993211925, 0.9924795628, 0.9917097688, 0.9909026623, 0.9900581837, 0.9891765118, 0.988257587, 0.9873014092, 0.9863080978, 0.9852776527, 0.9842100739, 0.9831054807, 0.9819638729, 0.9807852507, 0.9795697927, 0.97831738, 0.9770281315, 0.975702107, 0.974339366, 0.9729399681, 0.9715039134, 0.9700312614, 0.9685220718, 0.9669764638, 0.9653944373, 0.963776052, 0.9621214271, 0.9604305029, 0.9587034583, 0.9569403529, 0.9551411867, 0.9533060193, 0.9514350295, 0.9495281577, 0.9475855827, 0.9456073046, 0.9435934424, 0.9415440559, 0.9394592047, 0.9373390079, 0.9351835251, 0.932992816, 0.9307669401, 0.9285060763, 0.9262102246, 0.9238795042, 0.9215140343, 0.9191138744, 0.9166790843, 0.9142097831, 0.9117060304, 0.909168005, 0.9065957069, 0.903989315, 0.9013488293, 0.8986744881, 0.8959662318, 0.893224299, 0.8904487491, 0.8876396418, 0.8847970963, 0.8819212914, 0.8790122271, 0.8760700822, 0.8730949759, 0.8700869679, 0.867046237, 0.8639728427, 0.8608669639, 0.8577286005, 0.854557991, 0.851355195, 0.8481203318, 0.84485358, 0.8415549994, 0.838224709, 0.8348628879, 0.8314695954, 0.8280450702, 0.8245893121, 0.8211025, 0.8175848126, 0.8140363097, 0.81045717, 0.8068475723, 0.8032075167, 0.7995372415, 0.7958369255, 0.7921065688, 0.7883464098, 0.7845565677, 0.7807372212, 0.7768884897, 0.7730104327, 0.7691033483, 0.7651672363, 0.761202395, 0.7572088242, 0.7531868219, 0.7491363883, 0.7450577617, 0.7409511209, 0.7368165851, 0.7326542735, 0.728464365, 0.724247098, 0.720002532, 0.7157308459, 0.7114322186, 0.7071067691, 0.7027547359, 0.6983762383, 0.6939714551, 0.689540565, 0.6850836873, 0.6806010008, 0.6760926843, 0.6715589762, 0.6669999361, 0.6624158025, 0.6578066945, 0.6531728506, 0.64851439, 0.6438315511, 0.6391244531, 0.6343932748, 0.6296382546, 0.6248595119, 0.6200572252, 0.6152315736, 0.6103827953, 0.6055110693, 0.6006164551, 0.5956993103, 0.5907596946, 0.5857978463, 0.5808139443, 0.5758081675, 0.5707807541, 0.5657318234, 0.5606615543, 0.5555702448, 0.5504579544, 0.5453249812, 0.5401714444, 0.534997642, 0.5298036337, 0.5245896578, 0.5193560123, 0.514102757, 0.5088301301, 0.5035383701, 0.4982276559, 0.492898196, 0.4875501692, 0.4821837842, 0.4767992198, 0.4713967443, 0.4659765065, 0.4605387151, 0.4550835788, 0.449611336, 0.4441221356, 0.438616246, 0.433093816, 0.4275550842, 0.4220002592, 0.4164295495, 0.4108431637, 0.4052413106, 0.3996241987, 0.3939920366, 0.3883450329, 0.3826834261, 0.3770074248, 0.3713172078, 0.3656129837, 0.3598950505, 0.3541635275, 0.3484186828, 0.3426607251, 0.336889863, 0.3311063051, 0.3253102899, 0.3195020258, 0.3136817515, 0.3078496456, 0.3020059466, 0.296150893, 0.2902846634, 0.2844075263, 0.27851969, 0.2726213634, 0.266712755, 0.2607941031, 0.2548656464, 0.2489276081, 0.2429801822, 0.2370236069, 0.2310581058, 0.2250839174, 0.2191012353, 0.2131103128, 0.2071113735, 0.201104641, 0.1950903237, 0.1890686601, 0.1830398887, 0.1770042181, 0.1709618866, 0.1649131179, 0.1588581502, 0.1527971923, 0.1467304677, 0.1406582445, 0.1345807016, 0.1284981072, 0.1224106774, 0.1163186282, 0.1102222055, 0.1041216329, 0.09801714122, 0.09190895408, 0.08579730988, 0.07968243957, 0.07356456667, 0.06744392216, 0.061320737, 0.05519524589, 0.04906767607, 0.0429382585, 0.03680722415, 0.030674804, 0.02454122901, 0.01840673015, 0.01227153838, 0.006135884672}; -constant float tc12[512] = {1, 0.9998306036, 0.9993223548, 0.9984755516, 0.9972904325, 0.9957674146, 0.9939069748, 0.9917097688, 0.9891765118, 0.9863080978, 0.9831054807, 0.9795697927, 0.975702107, 0.9715039134, 0.9669764638, 0.9621214271, 0.9569403529, 0.9514350295, 0.9456073046, 0.9394592047, 0.932992816, 0.9262102246, 0.9191138744, 0.9117060304, 0.903989315, 0.8959662318, 0.8876396418, 0.8790122271, 0.8700869679, 0.8608669639, 0.851355195, 0.8415549994, 0.8314695954, 0.8211025, 0.81045717, 0.7995372415, 0.7883464098, 0.7768884897, 0.7651672363, 0.7531868219, 0.7409511209, 0.728464365, 0.7157308459, 0.7027547359, 0.689540565, 0.6760926843, 0.6624158025, 0.64851439, 0.6343932748, 0.6200572252, 0.6055110693, 0.5907596946, 0.5758081675, 0.5606615543, 0.5453249812, 0.5298036337, 0.514102757, 0.4982276559, 0.4821837842, 0.4659765065, 0.449611336, 0.433093816, 0.4164295495, 0.3996241987, 0.3826834261, 0.3656129837, 0.3484186828, 0.3311063051, 0.3136817515, 0.296150893, 0.27851969, 0.2607941031, 0.2429801822, 0.2250839174, 0.2071113735, 0.1890686601, 0.1709618866, 0.1527971923, 0.1345807016, 0.1163186282, 0.09801714122, 0.07968243957, 0.061320737, 0.0429382585, 0.02454122901, 0.006135884672, -0.01227153838, -0.030674804, -0.04906767607, -0.06744392216, -0.08579730988, -0.1041216329, -0.1224106774, -0.1406582445, -0.1588581502, -0.1770042181, -0.1950903237, -0.2131103128, -0.2310581058, -0.2489276081, -0.266712755, -0.2844075263, -0.3020059466, -0.3195020258, -0.336889863, -0.3541635275, -0.3713172078, -0.3883450329, -0.4052413106, -0.4220002592, -0.438616246, -0.4550835788, -0.4713967443, -0.4875501692, -0.5035383701, -0.5193560123, -0.534997642, -0.5504579544, -0.5657318234, -0.5808139443, -0.5956993103, -0.6103827953, -0.6248595119, -0.6391244531, -0.6531728506, -0.6669999361, -0.6806010008, -0.6939714551, -0.7071067691, -0.720002532, -0.7326542735, -0.7450577617, -0.7572088242, -0.7691033483, -0.7807372212, -0.7921065688, -0.8032075167, -0.8140363097, -0.8245893121, -0.8348628879, -0.84485358, -0.854557991, -0.8639728427, -0.8730949759, -0.8819212914, -0.8904487491, -0.8986744881, -0.9065957069, -0.9142097831, -0.9215140343, -0.9285060763, -0.9351835251, -0.9415440559, -0.9475855827, -0.9533060193, -0.9587034583, -0.963776052, -0.9685220718, -0.9729399681, -0.9770281315, -0.9807852507, -0.9842100739, -0.9873014092, -0.9900581837, -0.9924795628, -0.9945645928, -0.9963126183, -0.997723043, -0.9987954497, -0.9995294213, -0.9999247193, -0.9999811649, -0.9996988177, -0.9990777373, -0.9981181026, -0.996820271, -0.9951847196, -0.993211925, -0.9909026623, -0.988257587, -0.9852776527, -0.9819638729, -0.97831738, -0.974339366, -0.9700312614, -0.9653944373, -0.9604305029, -0.9551411867, -0.9495281577, -0.9435934424, -0.9373390079, -0.9307669401, -0.9238795042, -0.9166790843, -0.909168005, -0.9013488293, -0.893224299, -0.8847970963, -0.8760700822, -0.867046237, -0.8577286005, -0.8481203318, -0.838224709, -0.8280450702, -0.8175848126, -0.8068475723, -0.7958369255, -0.7845565677, -0.7730104327, -0.761202395, -0.7491363883, -0.7368165851, -0.724247098, -0.7114322186, -0.6983762383, -0.6850836873, -0.6715589762, -0.6578066945, -0.6438315511, -0.6296382546, -0.6152315736, -0.6006164551, -0.5857978463, -0.5707807541, -0.5555702448, -0.5401714444, -0.5245896578, -0.5088301301, -0.492898196, -0.4767992198, -0.4605387151, -0.4441221356, -0.4275550842, -0.4108431637, -0.3939920366, -0.3770074248, -0.3598950505, -0.3426607251, -0.3253102899, -0.3078496456, -0.2902846634, -0.2726213634, -0.2548656464, -0.2370236069, -0.2191012353, -0.201104641, -0.1830398887, -0.1649131179, -0.1467304677, -0.1284981072, -0.1102222055, -0.09190895408, -0.07356456667, -0.05519524589, -0.03680722415, -0.01840673015, 1, 0.9998306036, 0.9993223548, 0.9984755516, 0.9972904325, 0.9957674146, 0.9939069748, 0.9917097688, 0.9891765118, 0.9863080978, 0.9831054807, 0.9795697927, 0.975702107, 0.9715039134, 0.9669764638, 0.9621214271, 0.9569403529, 0.9514350295, 0.9456073046, 0.9394592047, 0.932992816, 0.9262102246, 0.9191138744, 0.9117060304, 0.903989315, 0.8959662318, 0.8876396418, 0.8790122271, 0.8700869679, 0.8608669639, 0.851355195, 0.8415549994, 0.8314695954, 0.8211025, 0.81045717, 0.7995372415, 0.7883464098, 0.7768884897, 0.7651672363, 0.7531868219, 0.7409511209, 0.728464365, 0.7157308459, 0.7027547359, 0.689540565, 0.6760926843, 0.6624158025, 0.64851439, 0.6343932748, 0.6200572252, 0.6055110693, 0.5907596946, 0.5758081675, 0.5606615543, 0.5453249812, 0.5298036337, 0.514102757, 0.4982276559, 0.4821837842, 0.4659765065, 0.449611336, 0.433093816, 0.4164295495, 0.3996241987, 0.3826834261, 0.3656129837, 0.3484186828, 0.3311063051, 0.3136817515, 0.296150893, 0.27851969, 0.2607941031, 0.2429801822, 0.2250839174, 0.2071113735, 0.1890686601, 0.1709618866, 0.1527971923, 0.1345807016, 0.1163186282, 0.09801714122, 0.07968243957, 0.061320737, 0.0429382585, 0.02454122901, 0.006135884672, -0.01227153838, -0.030674804, -0.04906767607, -0.06744392216, -0.08579730988, -0.1041216329, -0.1224106774, -0.1406582445, -0.1588581502, -0.1770042181, -0.1950903237, -0.2131103128, -0.2310581058, -0.2489276081, -0.266712755, -0.2844075263, -0.3020059466, -0.3195020258, -0.336889863, -0.3541635275, -0.3713172078, -0.3883450329, -0.4052413106, -0.4220002592, -0.438616246, -0.4550835788, -0.4713967443, -0.4875501692, -0.5035383701, -0.5193560123, -0.534997642, -0.5504579544, -0.5657318234, -0.5808139443, -0.5956993103, -0.6103827953, -0.6248595119, -0.6391244531, -0.6531728506, -0.6669999361, -0.6806010008, -0.6939714551, -0.7071067691, -0.720002532, -0.7326542735, -0.7450577617, -0.7572088242, -0.7691033483, -0.7807372212, -0.7921065688, -0.8032075167, -0.8140363097, -0.8245893121, -0.8348628879, -0.84485358, -0.854557991, -0.8639728427, -0.8730949759, -0.8819212914, -0.8904487491, -0.8986744881, -0.9065957069, -0.9142097831, -0.9215140343, -0.9285060763, -0.9351835251, -0.9415440559, -0.9475855827, -0.9533060193, -0.9587034583, -0.963776052, -0.9685220718, -0.9729399681, -0.9770281315, -0.9807852507, -0.9842100739, -0.9873014092, -0.9900581837, -0.9924795628, -0.9945645928, -0.9963126183, -0.997723043, -0.9987954497, -0.9995294213, -0.9999247193, -0.9999811649, -0.9996988177, -0.9990777373, -0.9981181026, -0.996820271, -0.9951847196, -0.993211925, -0.9909026623, -0.988257587, -0.9852776527, -0.9819638729, -0.97831738, -0.974339366, -0.9700312614, -0.9653944373, -0.9604305029, -0.9551411867, -0.9495281577, -0.9435934424, -0.9373390079, -0.9307669401, -0.9238795042, -0.9166790843, -0.909168005, -0.9013488293, -0.893224299, -0.8847970963, -0.8760700822, -0.867046237, -0.8577286005, -0.8481203318, -0.838224709, -0.8280450702, -0.8175848126, -0.8068475723, -0.7958369255, -0.7845565677, -0.7730104327, -0.761202395, -0.7491363883, -0.7368165851, -0.724247098, -0.7114322186, -0.6983762383, -0.6850836873, -0.6715589762, -0.6578066945, -0.6438315511, -0.6296382546, -0.6152315736, -0.6006164551, -0.5857978463, -0.5707807541, -0.5555702448, -0.5401714444, -0.5245896578, -0.5088301301, -0.492898196, -0.4767992198, -0.4605387151, -0.4441221356, -0.4275550842, -0.4108431637, -0.3939920366, -0.3770074248, -0.3598950505, -0.3426607251, -0.3253102899, -0.3078496456, -0.2902846634, -0.2726213634, -0.2548656464, -0.2370236069, -0.2191012353, -0.201104641, -0.1830398887, -0.1649131179, -0.1467304677, -0.1284981072, -0.1102222055, -0.09190895408, -0.07356456667, -0.05519524589, -0.03680722415, -0.01840673015}; -constant float tc15[512] = { 1, 0.9998306036, 0.9993223548, 0.9984755516, 0.9972904325, 0.9957674146, 0.9939069748, 0.9917097688, 0.9891765118, 0.9863080978, 0.9831054807, 0.9795697927, 0.975702107, 0.9715039134, 0.9669764638, 0.9621214271, 0.9569403529, 0.9514350295, 0.9456073046, 0.9394592047, 0.932992816, 0.9262102246, 0.9191138744, 0.9117060304, 0.903989315, 0.8959662318, 0.8876396418, 0.8790122271, 0.8700869679, 0.8608669639, 0.851355195, 0.8415549994, 0.8314695954, 0.8211025, 0.81045717, 0.7995372415, 0.7883464098, 0.7768884897, 0.7651672363, 0.7531868219, 0.7409511209, 0.728464365, 0.7157308459, 0.7027547359, 0.689540565, 0.6760926843, 0.6624158025, 0.64851439, 0.6343932748, 0.6200572252, 0.6055110693, 0.5907596946, 0.5758081675, 0.5606615543, 0.5453249812, 0.5298036337, 0.514102757, 0.4982276559, 0.4821837842, 0.4659765065, 0.449611336, 0.433093816, 0.4164295495, 0.3996241987, 0.3826834261, 0.3656129837, 0.3484186828, 0.3311063051, 0.3136817515, 0.296150893, 0.27851969, 0.2607941031, 0.2429801822, 0.2250839174, 0.2071113735, 0.1890686601, 0.1709618866, 0.1527971923, 0.1345807016, 0.1163186282, 0.09801714122, 0.07968243957, 0.061320737, 0.0429382585, 0.02454122901, 0.006135884672, -0.01227153838, -0.030674804, -0.04906767607, -0.06744392216, -0.08579730988, -0.1041216329, -0.1224106774, -0.1406582445, -0.1588581502, -0.1770042181, -0.1950903237, -0.2131103128, -0.2310581058, -0.2489276081, -0.266712755, -0.2844075263, -0.3020059466, -0.3195020258, -0.336889863, -0.3541635275, -0.3713172078, -0.3883450329, -0.4052413106, -0.4220002592, -0.438616246, -0.4550835788, -0.4713967443, -0.4875501692, -0.5035383701, -0.5193560123, -0.534997642, -0.5504579544, -0.5657318234, -0.5808139443, -0.5956993103, -0.6103827953, -0.6248595119, -0.6391244531, -0.6531728506, -0.6669999361, -0.6806010008, -0.6939714551, -0.7071067691, -0.720002532, -0.7326542735, -0.7450577617, -0.7572088242, -0.7691033483, -0.7807372212, -0.7921065688, -0.8032075167, -0.8140363097, -0.8245893121, -0.8348628879, -0.84485358, -0.854557991, -0.8639728427, -0.8730949759, -0.8819212914, -0.8904487491, -0.8986744881, -0.9065957069, -0.9142097831, -0.9215140343, -0.9285060763, -0.9351835251, -0.9415440559, -0.9475855827, -0.9533060193, -0.9587034583, -0.963776052, -0.9685220718, -0.9729399681, -0.9770281315, -0.9807852507, -0.9842100739, -0.9873014092, -0.9900581837, -0.9924795628, -0.9945645928, -0.9963126183, -0.997723043, -0.9987954497, -0.9995294213, -0.9999247193, -0.9999811649, -0.9996988177, -0.9990777373, -0.9981181026, -0.996820271, -0.9951847196, -0.993211925, -0.9909026623, -0.988257587, -0.9852776527, -0.9819638729, -0.97831738, -0.974339366, -0.9700312614, -0.9653944373, -0.9604305029, -0.9551411867, -0.9495281577, -0.9435934424, -0.9373390079, -0.9307669401, -0.9238795042, -0.9166790843, -0.909168005, -0.9013488293, -0.893224299, -0.8847970963, -0.8760700822, -0.867046237, -0.8577286005, -0.8481203318, -0.838224709, -0.8280450702, -0.8175848126, -0.8068475723, -0.7958369255, -0.7845565677, -0.7730104327, -0.761202395, -0.7491363883, -0.7368165851, -0.724247098, -0.7114322186, -0.6983762383, -0.6850836873, -0.6715589762, -0.6578066945, -0.6438315511, -0.6296382546, -0.6152315736, -0.6006164551, -0.5857978463, -0.5707807541, -0.5555702448, -0.5401714444, -0.5245896578, -0.5088301301, -0.492898196, -0.4767992198, -0.4605387151, -0.4441221356, -0.4275550842, -0.4108431637, -0.3939920366, -0.3770074248, -0.3598950505, -0.3426607251, -0.3253102899, -0.3078496456, -0.2902846634, -0.2726213634, -0.2548656464, -0.2370236069, -0.2191012353, -0.201104641, -0.1830398887, -0.1649131179, -0.1467304677, -0.1284981072, -0.1102222055, -0.09190895408, -0.07356456667, -0.05519524589, -0.03680722415, -0.01840673015, 1, 0.9998306036, 0.9993223548, 0.9984755516, 0.9972904325, 0.9957674146, 0.9939069748, 0.9917097688, 0.9891765118, 0.9863080978, 0.9831054807, 0.9795697927, 0.975702107, 0.9715039134, 0.9669764638, 0.9621214271, 0.9569403529, 0.9514350295, 0.9456073046, 0.9394592047, 0.932992816, 0.9262102246, 0.9191138744, 0.9117060304, 0.903989315, 0.8959662318, 0.8876396418, 0.8790122271, 0.8700869679, 0.8608669639, 0.851355195, 0.8415549994, 0.8314695954, 0.8211025, 0.81045717, 0.7995372415, 0.7883464098, 0.7768884897, 0.7651672363, 0.7531868219, 0.7409511209, 0.728464365, 0.7157308459, 0.7027547359, 0.689540565, 0.6760926843, 0.6624158025, 0.64851439, 0.6343932748, 0.6200572252, 0.6055110693, 0.5907596946, 0.5758081675, 0.5606615543, 0.5453249812, 0.5298036337, 0.514102757, 0.4982276559, 0.4821837842, 0.4659765065, 0.449611336, 0.433093816, 0.4164295495, 0.3996241987, 0.3826834261, 0.3656129837, 0.3484186828, 0.3311063051, 0.3136817515, 0.296150893, 0.27851969, 0.2607941031, 0.2429801822, 0.2250839174, 0.2071113735, 0.1890686601, 0.1709618866, 0.1527971923, 0.1345807016, 0.1163186282, 0.09801714122, 0.07968243957, 0.061320737, 0.0429382585, 0.02454122901, 0.006135884672, -0.01227153838, -0.030674804, -0.04906767607, -0.06744392216, -0.08579730988, -0.1041216329, -0.1224106774, -0.1406582445, -0.1588581502, -0.1770042181, -0.1950903237, -0.2131103128, -0.2310581058, -0.2489276081, -0.266712755, -0.2844075263, -0.3020059466, -0.3195020258, -0.336889863, -0.3541635275, -0.3713172078, -0.3883450329, -0.4052413106, -0.4220002592, -0.438616246, -0.4550835788, -0.4713967443, -0.4875501692, -0.5035383701, -0.5193560123, -0.534997642, -0.5504579544, -0.5657318234, -0.5808139443, -0.5956993103, -0.6103827953, -0.6248595119, -0.6391244531, -0.6531728506, -0.6669999361, -0.6806010008, -0.6939714551, -0.7071067691, -0.720002532, -0.7326542735, -0.7450577617, -0.7572088242, -0.7691033483, -0.7807372212, -0.7921065688, -0.8032075167, -0.8140363097, -0.8245893121, -0.8348628879, -0.84485358, -0.854557991, -0.8639728427, -0.8730949759, -0.8819212914, -0.8904487491, -0.8986744881, -0.9065957069, -0.9142097831, -0.9215140343, -0.9285060763, -0.9351835251, -0.9415440559, -0.9475855827, -0.9533060193, -0.9587034583, -0.963776052, -0.9685220718, -0.9729399681, -0.9770281315, -0.9807852507, -0.9842100739, -0.9873014092, -0.9900581837, -0.9924795628, -0.9945645928, -0.9963126183, -0.997723043, -0.9987954497, -0.9995294213, -0.9999247193, -0.9999811649, -0.9996988177, -0.9990777373, -0.9981181026, -0.996820271, -0.9951847196, -0.993211925, -0.9909026623, -0.988257587, -0.9852776527, -0.9819638729, -0.97831738, -0.974339366, -0.9700312614, -0.9653944373, -0.9604305029, -0.9551411867, -0.9495281577, -0.9435934424, -0.9373390079, -0.9307669401, -0.9238795042, -0.9166790843, -0.909168005, -0.9013488293, -0.893224299, -0.8847970963, -0.8760700822, -0.867046237, -0.8577286005, -0.8481203318, -0.838224709, -0.8280450702, -0.8175848126, -0.8068475723, -0.7958369255, -0.7845565677, -0.7730104327, -0.761202395, -0.7491363883, -0.7368165851, -0.724247098, -0.7114322186, -0.6983762383, -0.6850836873, -0.6715589762, -0.6578066945, -0.6438315511, -0.6296382546, -0.6152315736, -0.6006164551, -0.5857978463, -0.5707807541, -0.5555702448, -0.5401714444, -0.5245896578, -0.5088301301, -0.492898196, -0.4767992198, -0.4605387151, -0.4441221356, -0.4275550842, -0.4108431637, -0.3939920366, -0.3770074248, -0.3598950505, -0.3426607251, -0.3253102899, -0.3078496456, -0.2902846634, -0.2726213634, -0.2548656464, -0.2370236069, -0.2191012353, -0.201104641, -0.1830398887, -0.1649131179, -0.1467304677, -0.1284981072, -0.1102222055, -0.09190895408, -0.07356456667, -0.05519524589, -0.03680722415, -0.01840673015}; -constant float tc20[512] = {1, 0.9987954497, 0.9951847196, 0.9891765118, 0.9807852507, 0.9700312614, 0.9569403529, 0.9415440559, 0.9238795042, 0.903989315, 0.8819212914, 0.8577286005, 0.8314695954, 0.8032075167, 0.7730104327, 0.7409511209, 0.7071067691, 0.6715589762, 0.6343932748, 0.5956993103, 0.5555702448, 0.514102757, 0.4713967443, 0.4275550842, 0.3826834261, 0.336889863, 0.2902846634, 0.2429801822, 0.1950903237, 0.1467304677, 0.09801714122, 0.04906767607, 6.123234263e-17, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, 1, 0.9987954497, 0.9951847196, 0.9891765118, 0.9807852507, 0.9700312614, 0.9569403529, 0.9415440559, 0.9238795042, 0.903989315, 0.8819212914, 0.8577286005, 0.8314695954, 0.8032075167, 0.7730104327, 0.7409511209, 0.7071067691, 0.6715589762, 0.6343932748, 0.5956993103, 0.5555702448, 0.514102757, 0.4713967443, 0.4275550842, 0.3826834261, 0.336889863, 0.2902846634, 0.2429801822, 0.1950903237, 0.1467304677, 0.09801714122, 0.04906767607, 6.123234263e-17, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, 1, 0.9987954497, 0.9951847196, 0.9891765118, 0.9807852507, 0.9700312614, 0.9569403529, 0.9415440559, 0.9238795042, 0.903989315, 0.8819212914, 0.8577286005, 0.8314695954, 0.8032075167, 0.7730104327, 0.7409511209, 0.7071067691, 0.6715589762, 0.6343932748, 0.5956993103, 0.5555702448, 0.514102757, 0.4713967443, 0.4275550842, 0.3826834261, 0.336889863, 0.2902846634, 0.2429801822, 0.1950903237, 0.1467304677, 0.09801714122, 0.04906767607, 6.123234263e-17, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, 1, 0.9987954497, 0.9951847196, 0.9891765118, 0.9807852507, 0.9700312614, 0.9569403529, 0.9415440559, 0.9238795042, 0.903989315, 0.8819212914, 0.8577286005, 0.8314695954, 0.8032075167, 0.7730104327, 0.7409511209, 0.7071067691, 0.6715589762, 0.6343932748, 0.5956993103, 0.5555702448, 0.514102757, 0.4713967443, 0.4275550842, 0.3826834261, 0.336889863, 0.2902846634, 0.2429801822, 0.1950903237, 0.1467304677, 0.09801714122, 0.04906767607, 6.123234263e-17, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, 1, 0.9987954497, 0.9951847196, 0.9891765118, 0.9807852507, 0.9700312614, 0.9569403529, 0.9415440559, 0.9238795042, 0.903989315, 0.8819212914, 0.8577286005, 0.8314695954, 0.8032075167, 0.7730104327, 0.7409511209, 0.7071067691, 0.6715589762, 0.6343932748, 0.5956993103, 0.5555702448, 0.514102757, 0.4713967443, 0.4275550842, 0.3826834261, 0.336889863, 0.2902846634, 0.2429801822, 0.1950903237, 0.1467304677, 0.09801714122, 0.04906767607, 6.123234263e-17, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, 1, 0.9987954497, 0.9951847196, 0.9891765118, 0.9807852507, 0.9700312614, 0.9569403529, 0.9415440559, 0.9238795042, 0.903989315, 0.8819212914, 0.8577286005, 0.8314695954, 0.8032075167, 0.7730104327, 0.7409511209, 0.7071067691, 0.6715589762, 0.6343932748, 0.5956993103, 0.5555702448, 0.514102757, 0.4713967443, 0.4275550842, 0.3826834261, 0.336889863, 0.2902846634, 0.2429801822, 0.1950903237, 0.1467304677, 0.09801714122, 0.04906767607, 6.123234263e-17, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, 1, 0.9987954497, 0.9951847196, 0.9891765118, 0.9807852507, 0.9700312614, 0.9569403529, 0.9415440559, 0.9238795042, 0.903989315, 0.8819212914, 0.8577286005, 0.8314695954, 0.8032075167, 0.7730104327, 0.7409511209, 0.7071067691, 0.6715589762, 0.6343932748, 0.5956993103, 0.5555702448, 0.514102757, 0.4713967443, 0.4275550842, 0.3826834261, 0.336889863, 0.2902846634, 0.2429801822, 0.1950903237, 0.1467304677, 0.09801714122, 0.04906767607, 6.123234263e-17, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, 1, 0.9987954497, 0.9951847196, 0.9891765118, 0.9807852507, 0.9700312614, 0.9569403529, 0.9415440559, 0.9238795042, 0.903989315, 0.8819212914, 0.8577286005, 0.8314695954, 0.8032075167, 0.7730104327, 0.7409511209, 0.7071067691, 0.6715589762, 0.6343932748, 0.5956993103, 0.5555702448, 0.514102757, 0.4713967443, 0.4275550842, 0.3826834261, 0.336889863, 0.2902846634, 0.2429801822, 0.1950903237, 0.1467304677, 0.09801714122, 0.04906767607, 6.123234263e-17, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497}; -constant float tc23[512] = {1, 0.9987954497, 0.9951847196, 0.9891765118, 0.9807852507, 0.9700312614, 0.9569403529, 0.9415440559, 0.9238795042, 0.903989315, 0.8819212914, 0.8577286005, 0.8314695954, 0.8032075167, 0.7730104327, 0.7409511209, 0.7071067691, 0.6715589762, 0.6343932748, 0.5956993103, 0.5555702448, 0.514102757, 0.4713967443, 0.4275550842, 0.3826834261, 0.336889863, 0.2902846634, 0.2429801822, 0.1950903237, 0.1467304677, 0.09801714122, 0.04906767607, 6.123234263e-17, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, 1, 0.9987954497, 0.9951847196, 0.9891765118, 0.9807852507, 0.9700312614, 0.9569403529, 0.9415440559, 0.9238795042, 0.903989315, 0.8819212914, 0.8577286005, 0.8314695954, 0.8032075167, 0.7730104327, 0.7409511209, 0.7071067691, 0.6715589762, 0.6343932748, 0.5956993103, 0.5555702448, 0.514102757, 0.4713967443, 0.4275550842, 0.3826834261, 0.336889863, 0.2902846634, 0.2429801822, 0.1950903237, 0.1467304677, 0.09801714122, 0.04906767607, 6.123234263e-17, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, 1, 0.9987954497, 0.9951847196, 0.9891765118, 0.9807852507, 0.9700312614, 0.9569403529, 0.9415440559, 0.9238795042, 0.903989315, 0.8819212914, 0.8577286005, 0.8314695954, 0.8032075167, 0.7730104327, 0.7409511209, 0.7071067691, 0.6715589762, 0.6343932748, 0.5956993103, 0.5555702448, 0.514102757, 0.4713967443, 0.4275550842, 0.3826834261, 0.336889863, 0.2902846634, 0.2429801822, 0.1950903237, 0.1467304677, 0.09801714122, 0.04906767607, 6.123234263e-17, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, 1, 0.9987954497, 0.9951847196, 0.9891765118, 0.9807852507, 0.9700312614, 0.9569403529, 0.9415440559, 0.9238795042, 0.903989315, 0.8819212914, 0.8577286005, 0.8314695954, 0.8032075167, 0.7730104327, 0.7409511209, 0.7071067691, 0.6715589762, 0.6343932748, 0.5956993103, 0.5555702448, 0.514102757, 0.4713967443, 0.4275550842, 0.3826834261, 0.336889863, 0.2902846634, 0.2429801822, 0.1950903237, 0.1467304677, 0.09801714122, 0.04906767607, 6.123234263e-17, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, 1, 0.9987954497, 0.9951847196, 0.9891765118, 0.9807852507, 0.9700312614, 0.9569403529, 0.9415440559, 0.9238795042, 0.903989315, 0.8819212914, 0.8577286005, 0.8314695954, 0.8032075167, 0.7730104327, 0.7409511209, 0.7071067691, 0.6715589762, 0.6343932748, 0.5956993103, 0.5555702448, 0.514102757, 0.4713967443, 0.4275550842, 0.3826834261, 0.336889863, 0.2902846634, 0.2429801822, 0.1950903237, 0.1467304677, 0.09801714122, 0.04906767607, 6.123234263e-17, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, 1, 0.9987954497, 0.9951847196, 0.9891765118, 0.9807852507, 0.9700312614, 0.9569403529, 0.9415440559, 0.9238795042, 0.903989315, 0.8819212914, 0.8577286005, 0.8314695954, 0.8032075167, 0.7730104327, 0.7409511209, 0.7071067691, 0.6715589762, 0.6343932748, 0.5956993103, 0.5555702448, 0.514102757, 0.4713967443, 0.4275550842, 0.3826834261, 0.336889863, 0.2902846634, 0.2429801822, 0.1950903237, 0.1467304677, 0.09801714122, 0.04906767607, 6.123234263e-17, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, 1, 0.9987954497, 0.9951847196, 0.9891765118, 0.9807852507, 0.9700312614, 0.9569403529, 0.9415440559, 0.9238795042, 0.903989315, 0.8819212914, 0.8577286005, 0.8314695954, 0.8032075167, 0.7730104327, 0.7409511209, 0.7071067691, 0.6715589762, 0.6343932748, 0.5956993103, 0.5555702448, 0.514102757, 0.4713967443, 0.4275550842, 0.3826834261, 0.336889863, 0.2902846634, 0.2429801822, 0.1950903237, 0.1467304677, 0.09801714122, 0.04906767607, 6.123234263e-17, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, 1, 0.9987954497, 0.9951847196, 0.9891765118, 0.9807852507, 0.9700312614, 0.9569403529, 0.9415440559, 0.9238795042, 0.903989315, 0.8819212914, 0.8577286005, 0.8314695954, 0.8032075167, 0.7730104327, 0.7409511209, 0.7071067691, 0.6715589762, 0.6343932748, 0.5956993103, 0.5555702448, 0.514102757, 0.4713967443, 0.4275550842, 0.3826834261, 0.336889863, 0.2902846634, 0.2429801822, 0.1950903237, 0.1467304677, 0.09801714122, 0.04906767607, 6.123234263e-17, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497}; -constant float tc21[512] = {1, 0.9996988177, 0.9987954497, 0.9972904325, 0.9951847196, 0.9924795628, 0.9891765118, 0.9852776527, 0.9807852507, 0.975702107, 0.9700312614, 0.963776052, 0.9569403529, 0.9495281577, 0.9415440559, 0.932992816, 0.9238795042, 0.9142097831, 0.903989315, 0.893224299, 0.8819212914, 0.8700869679, 0.8577286005, 0.84485358, 0.8314695954, 0.8175848126, 0.8032075167, 0.7883464098, 0.7730104327, 0.7572088242, 0.7409511209, 0.724247098, 0.7071067691, 0.689540565, 0.6715589762, 0.6531728506, 0.6343932748, 0.6152315736, 0.5956993103, 0.5758081675, 0.5555702448, 0.534997642, 0.514102757, 0.492898196, 0.4713967443, 0.449611336, 0.4275550842, 0.4052413106, 0.3826834261, 0.3598950505, 0.336889863, 0.3136817515, 0.2902846634, 0.266712755, 0.2429801822, 0.2191012353, 0.1950903237, 0.1709618866, 0.1467304677, 0.1224106774, 0.09801714122, 0.07356456667, 0.04906767607, 0.02454122901, 1, 0.9996988177, 0.9987954497, 0.9972904325, 0.9951847196, 0.9924795628, 0.9891765118, 0.9852776527, 0.9807852507, 0.975702107, 0.9700312614, 0.963776052, 0.9569403529, 0.9495281577, 0.9415440559, 0.932992816, 0.9238795042, 0.9142097831, 0.903989315, 0.893224299, 0.8819212914, 0.8700869679, 0.8577286005, 0.84485358, 0.8314695954, 0.8175848126, 0.8032075167, 0.7883464098, 0.7730104327, 0.7572088242, 0.7409511209, 0.724247098, 0.7071067691, 0.689540565, 0.6715589762, 0.6531728506, 0.6343932748, 0.6152315736, 0.5956993103, 0.5758081675, 0.5555702448, 0.534997642, 0.514102757, 0.492898196, 0.4713967443, 0.449611336, 0.4275550842, 0.4052413106, 0.3826834261, 0.3598950505, 0.336889863, 0.3136817515, 0.2902846634, 0.266712755, 0.2429801822, 0.2191012353, 0.1950903237, 0.1709618866, 0.1467304677, 0.1224106774, 0.09801714122, 0.07356456667, 0.04906767607, 0.02454122901, 1, 0.9996988177, 0.9987954497, 0.9972904325, 0.9951847196, 0.9924795628, 0.9891765118, 0.9852776527, 0.9807852507, 0.975702107, 0.9700312614, 0.963776052, 0.9569403529, 0.9495281577, 0.9415440559, 0.932992816, 0.9238795042, 0.9142097831, 0.903989315, 0.893224299, 0.8819212914, 0.8700869679, 0.8577286005, 0.84485358, 0.8314695954, 0.8175848126, 0.8032075167, 0.7883464098, 0.7730104327, 0.7572088242, 0.7409511209, 0.724247098, 0.7071067691, 0.689540565, 0.6715589762, 0.6531728506, 0.6343932748, 0.6152315736, 0.5956993103, 0.5758081675, 0.5555702448, 0.534997642, 0.514102757, 0.492898196, 0.4713967443, 0.449611336, 0.4275550842, 0.4052413106, 0.3826834261, 0.3598950505, 0.336889863, 0.3136817515, 0.2902846634, 0.266712755, 0.2429801822, 0.2191012353, 0.1950903237, 0.1709618866, 0.1467304677, 0.1224106774, 0.09801714122, 0.07356456667, 0.04906767607, 0.02454122901, 1, 0.9996988177, 0.9987954497, 0.9972904325, 0.9951847196, 0.9924795628, 0.9891765118, 0.9852776527, 0.9807852507, 0.975702107, 0.9700312614, 0.963776052, 0.9569403529, 0.9495281577, 0.9415440559, 0.932992816, 0.9238795042, 0.9142097831, 0.903989315, 0.893224299, 0.8819212914, 0.8700869679, 0.8577286005, 0.84485358, 0.8314695954, 0.8175848126, 0.8032075167, 0.7883464098, 0.7730104327, 0.7572088242, 0.7409511209, 0.724247098, 0.7071067691, 0.689540565, 0.6715589762, 0.6531728506, 0.6343932748, 0.6152315736, 0.5956993103, 0.5758081675, 0.5555702448, 0.534997642, 0.514102757, 0.492898196, 0.4713967443, 0.449611336, 0.4275550842, 0.4052413106, 0.3826834261, 0.3598950505, 0.336889863, 0.3136817515, 0.2902846634, 0.266712755, 0.2429801822, 0.2191012353, 0.1950903237, 0.1709618866, 0.1467304677, 0.1224106774, 0.09801714122, 0.07356456667, 0.04906767607, 0.02454122901, 1, 0.9996988177, 0.9987954497, 0.9972904325, 0.9951847196, 0.9924795628, 0.9891765118, 0.9852776527, 0.9807852507, 0.975702107, 0.9700312614, 0.963776052, 0.9569403529, 0.9495281577, 0.9415440559, 0.932992816, 0.9238795042, 0.9142097831, 0.903989315, 0.893224299, 0.8819212914, 0.8700869679, 0.8577286005, 0.84485358, 0.8314695954, 0.8175848126, 0.8032075167, 0.7883464098, 0.7730104327, 0.7572088242, 0.7409511209, 0.724247098, 0.7071067691, 0.689540565, 0.6715589762, 0.6531728506, 0.6343932748, 0.6152315736, 0.5956993103, 0.5758081675, 0.5555702448, 0.534997642, 0.514102757, 0.492898196, 0.4713967443, 0.449611336, 0.4275550842, 0.4052413106, 0.3826834261, 0.3598950505, 0.336889863, 0.3136817515, 0.2902846634, 0.266712755, 0.2429801822, 0.2191012353, 0.1950903237, 0.1709618866, 0.1467304677, 0.1224106774, 0.09801714122, 0.07356456667, 0.04906767607, 0.02454122901, 1, 0.9996988177, 0.9987954497, 0.9972904325, 0.9951847196, 0.9924795628, 0.9891765118, 0.9852776527, 0.9807852507, 0.975702107, 0.9700312614, 0.963776052, 0.9569403529, 0.9495281577, 0.9415440559, 0.932992816, 0.9238795042, 0.9142097831, 0.903989315, 0.893224299, 0.8819212914, 0.8700869679, 0.8577286005, 0.84485358, 0.8314695954, 0.8175848126, 0.8032075167, 0.7883464098, 0.7730104327, 0.7572088242, 0.7409511209, 0.724247098, 0.7071067691, 0.689540565, 0.6715589762, 0.6531728506, 0.6343932748, 0.6152315736, 0.5956993103, 0.5758081675, 0.5555702448, 0.534997642, 0.514102757, 0.492898196, 0.4713967443, 0.449611336, 0.4275550842, 0.4052413106, 0.3826834261, 0.3598950505, 0.336889863, 0.3136817515, 0.2902846634, 0.266712755, 0.2429801822, 0.2191012353, 0.1950903237, 0.1709618866, 0.1467304677, 0.1224106774, 0.09801714122, 0.07356456667, 0.04906767607, 0.02454122901, 1, 0.9996988177, 0.9987954497, 0.9972904325, 0.9951847196, 0.9924795628, 0.9891765118, 0.9852776527, 0.9807852507, 0.975702107, 0.9700312614, 0.963776052, 0.9569403529, 0.9495281577, 0.9415440559, 0.932992816, 0.9238795042, 0.9142097831, 0.903989315, 0.893224299, 0.8819212914, 0.8700869679, 0.8577286005, 0.84485358, 0.8314695954, 0.8175848126, 0.8032075167, 0.7883464098, 0.7730104327, 0.7572088242, 0.7409511209, 0.724247098, 0.7071067691, 0.689540565, 0.6715589762, 0.6531728506, 0.6343932748, 0.6152315736, 0.5956993103, 0.5758081675, 0.5555702448, 0.534997642, 0.514102757, 0.492898196, 0.4713967443, 0.449611336, 0.4275550842, 0.4052413106, 0.3826834261, 0.3598950505, 0.336889863, 0.3136817515, 0.2902846634, 0.266712755, 0.2429801822, 0.2191012353, 0.1950903237, 0.1709618866, 0.1467304677, 0.1224106774, 0.09801714122, 0.07356456667, 0.04906767607, 0.02454122901, 1, 0.9996988177, 0.9987954497, 0.9972904325, 0.9951847196, 0.9924795628, 0.9891765118, 0.9852776527, 0.9807852507, 0.975702107, 0.9700312614, 0.963776052, 0.9569403529, 0.9495281577, 0.9415440559, 0.932992816, 0.9238795042, 0.9142097831, 0.903989315, 0.893224299, 0.8819212914, 0.8700869679, 0.8577286005, 0.84485358, 0.8314695954, 0.8175848126, 0.8032075167, 0.7883464098, 0.7730104327, 0.7572088242, 0.7409511209, 0.724247098, 0.7071067691, 0.689540565, 0.6715589762, 0.6531728506, 0.6343932748, 0.6152315736, 0.5956993103, 0.5758081675, 0.5555702448, 0.534997642, 0.514102757, 0.492898196, 0.4713967443, 0.449611336, 0.4275550842, 0.4052413106, 0.3826834261, 0.3598950505, 0.336889863, 0.3136817515, 0.2902846634, 0.266712755, 0.2429801822, 0.2191012353, 0.1950903237, 0.1709618866, 0.1467304677, 0.1224106774, 0.09801714122, 0.07356456667, 0.04906767607, 0.02454122901}; -constant float tc24[512] = {1, 0.9996988177, 0.9987954497, 0.9972904325, 0.9951847196, 0.9924795628, 0.9891765118, 0.9852776527, 0.9807852507, 0.975702107, 0.9700312614, 0.963776052, 0.9569403529, 0.9495281577, 0.9415440559, 0.932992816, 0.9238795042, 0.9142097831, 0.903989315, 0.893224299, 0.8819212914, 0.8700869679, 0.8577286005, 0.84485358, 0.8314695954, 0.8175848126, 0.8032075167, 0.7883464098, 0.7730104327, 0.7572088242, 0.7409511209, 0.724247098, 0.7071067691, 0.689540565, 0.6715589762, 0.6531728506, 0.6343932748, 0.6152315736, 0.5956993103, 0.5758081675, 0.5555702448, 0.534997642, 0.514102757, 0.492898196, 0.4713967443, 0.449611336, 0.4275550842, 0.4052413106, 0.3826834261, 0.3598950505, 0.336889863, 0.3136817515, 0.2902846634, 0.266712755, 0.2429801822, 0.2191012353, 0.1950903237, 0.1709618866, 0.1467304677, 0.1224106774, 0.09801714122, 0.07356456667, 0.04906767607, 0.02454122901, 1, 0.9996988177, 0.9987954497, 0.9972904325, 0.9951847196, 0.9924795628, 0.9891765118, 0.9852776527, 0.9807852507, 0.975702107, 0.9700312614, 0.963776052, 0.9569403529, 0.9495281577, 0.9415440559, 0.932992816, 0.9238795042, 0.9142097831, 0.903989315, 0.893224299, 0.8819212914, 0.8700869679, 0.8577286005, 0.84485358, 0.8314695954, 0.8175848126, 0.8032075167, 0.7883464098, 0.7730104327, 0.7572088242, 0.7409511209, 0.724247098, 0.7071067691, 0.689540565, 0.6715589762, 0.6531728506, 0.6343932748, 0.6152315736, 0.5956993103, 0.5758081675, 0.5555702448, 0.534997642, 0.514102757, 0.492898196, 0.4713967443, 0.449611336, 0.4275550842, 0.4052413106, 0.3826834261, 0.3598950505, 0.336889863, 0.3136817515, 0.2902846634, 0.266712755, 0.2429801822, 0.2191012353, 0.1950903237, 0.1709618866, 0.1467304677, 0.1224106774, 0.09801714122, 0.07356456667, 0.04906767607, 0.02454122901, 1, 0.9996988177, 0.9987954497, 0.9972904325, 0.9951847196, 0.9924795628, 0.9891765118, 0.9852776527, 0.9807852507, 0.975702107, 0.9700312614, 0.963776052, 0.9569403529, 0.9495281577, 0.9415440559, 0.932992816, 0.9238795042, 0.9142097831, 0.903989315, 0.893224299, 0.8819212914, 0.8700869679, 0.8577286005, 0.84485358, 0.8314695954, 0.8175848126, 0.8032075167, 0.7883464098, 0.7730104327, 0.7572088242, 0.7409511209, 0.724247098, 0.7071067691, 0.689540565, 0.6715589762, 0.6531728506, 0.6343932748, 0.6152315736, 0.5956993103, 0.5758081675, 0.5555702448, 0.534997642, 0.514102757, 0.492898196, 0.4713967443, 0.449611336, 0.4275550842, 0.4052413106, 0.3826834261, 0.3598950505, 0.336889863, 0.3136817515, 0.2902846634, 0.266712755, 0.2429801822, 0.2191012353, 0.1950903237, 0.1709618866, 0.1467304677, 0.1224106774, 0.09801714122, 0.07356456667, 0.04906767607, 0.02454122901, 1, 0.9996988177, 0.9987954497, 0.9972904325, 0.9951847196, 0.9924795628, 0.9891765118, 0.9852776527, 0.9807852507, 0.975702107, 0.9700312614, 0.963776052, 0.9569403529, 0.9495281577, 0.9415440559, 0.932992816, 0.9238795042, 0.9142097831, 0.903989315, 0.893224299, 0.8819212914, 0.8700869679, 0.8577286005, 0.84485358, 0.8314695954, 0.8175848126, 0.8032075167, 0.7883464098, 0.7730104327, 0.7572088242, 0.7409511209, 0.724247098, 0.7071067691, 0.689540565, 0.6715589762, 0.6531728506, 0.6343932748, 0.6152315736, 0.5956993103, 0.5758081675, 0.5555702448, 0.534997642, 0.514102757, 0.492898196, 0.4713967443, 0.449611336, 0.4275550842, 0.4052413106, 0.3826834261, 0.3598950505, 0.336889863, 0.3136817515, 0.2902846634, 0.266712755, 0.2429801822, 0.2191012353, 0.1950903237, 0.1709618866, 0.1467304677, 0.1224106774, 0.09801714122, 0.07356456667, 0.04906767607, 0.02454122901, 1, 0.9996988177, 0.9987954497, 0.9972904325, 0.9951847196, 0.9924795628, 0.9891765118, 0.9852776527, 0.9807852507, 0.975702107, 0.9700312614, 0.963776052, 0.9569403529, 0.9495281577, 0.9415440559, 0.932992816, 0.9238795042, 0.9142097831, 0.903989315, 0.893224299, 0.8819212914, 0.8700869679, 0.8577286005, 0.84485358, 0.8314695954, 0.8175848126, 0.8032075167, 0.7883464098, 0.7730104327, 0.7572088242, 0.7409511209, 0.724247098, 0.7071067691, 0.689540565, 0.6715589762, 0.6531728506, 0.6343932748, 0.6152315736, 0.5956993103, 0.5758081675, 0.5555702448, 0.534997642, 0.514102757, 0.492898196, 0.4713967443, 0.449611336, 0.4275550842, 0.4052413106, 0.3826834261, 0.3598950505, 0.336889863, 0.3136817515, 0.2902846634, 0.266712755, 0.2429801822, 0.2191012353, 0.1950903237, 0.1709618866, 0.1467304677, 0.1224106774, 0.09801714122, 0.07356456667, 0.04906767607, 0.02454122901, 1, 0.9996988177, 0.9987954497, 0.9972904325, 0.9951847196, 0.9924795628, 0.9891765118, 0.9852776527, 0.9807852507, 0.975702107, 0.9700312614, 0.963776052, 0.9569403529, 0.9495281577, 0.9415440559, 0.932992816, 0.9238795042, 0.9142097831, 0.903989315, 0.893224299, 0.8819212914, 0.8700869679, 0.8577286005, 0.84485358, 0.8314695954, 0.8175848126, 0.8032075167, 0.7883464098, 0.7730104327, 0.7572088242, 0.7409511209, 0.724247098, 0.7071067691, 0.689540565, 0.6715589762, 0.6531728506, 0.6343932748, 0.6152315736, 0.5956993103, 0.5758081675, 0.5555702448, 0.534997642, 0.514102757, 0.492898196, 0.4713967443, 0.449611336, 0.4275550842, 0.4052413106, 0.3826834261, 0.3598950505, 0.336889863, 0.3136817515, 0.2902846634, 0.266712755, 0.2429801822, 0.2191012353, 0.1950903237, 0.1709618866, 0.1467304677, 0.1224106774, 0.09801714122, 0.07356456667, 0.04906767607, 0.02454122901, 1, 0.9996988177, 0.9987954497, 0.9972904325, 0.9951847196, 0.9924795628, 0.9891765118, 0.9852776527, 0.9807852507, 0.975702107, 0.9700312614, 0.963776052, 0.9569403529, 0.9495281577, 0.9415440559, 0.932992816, 0.9238795042, 0.9142097831, 0.903989315, 0.893224299, 0.8819212914, 0.8700869679, 0.8577286005, 0.84485358, 0.8314695954, 0.8175848126, 0.8032075167, 0.7883464098, 0.7730104327, 0.7572088242, 0.7409511209, 0.724247098, 0.7071067691, 0.689540565, 0.6715589762, 0.6531728506, 0.6343932748, 0.6152315736, 0.5956993103, 0.5758081675, 0.5555702448, 0.534997642, 0.514102757, 0.492898196, 0.4713967443, 0.449611336, 0.4275550842, 0.4052413106, 0.3826834261, 0.3598950505, 0.336889863, 0.3136817515, 0.2902846634, 0.266712755, 0.2429801822, 0.2191012353, 0.1950903237, 0.1709618866, 0.1467304677, 0.1224106774, 0.09801714122, 0.07356456667, 0.04906767607, 0.02454122901, 1, 0.9996988177, 0.9987954497, 0.9972904325, 0.9951847196, 0.9924795628, 0.9891765118, 0.9852776527, 0.9807852507, 0.975702107, 0.9700312614, 0.963776052, 0.9569403529, 0.9495281577, 0.9415440559, 0.932992816, 0.9238795042, 0.9142097831, 0.903989315, 0.893224299, 0.8819212914, 0.8700869679, 0.8577286005, 0.84485358, 0.8314695954, 0.8175848126, 0.8032075167, 0.7883464098, 0.7730104327, 0.7572088242, 0.7409511209, 0.724247098, 0.7071067691, 0.689540565, 0.6715589762, 0.6531728506, 0.6343932748, 0.6152315736, 0.5956993103, 0.5758081675, 0.5555702448, 0.534997642, 0.514102757, 0.492898196, 0.4713967443, 0.449611336, 0.4275550842, 0.4052413106, 0.3826834261, 0.3598950505, 0.336889863, 0.3136817515, 0.2902846634, 0.266712755, 0.2429801822, 0.2191012353, 0.1950903237, 0.1709618866, 0.1467304677, 0.1224106774, 0.09801714122, 0.07356456667, 0.04906767607, 0.02454122901}; -constant float tc22[512] = {1, 0.9972904325, 0.9891765118, 0.975702107, 0.9569403529, 0.932992816, 0.903989315, 0.8700869679, 0.8314695954, 0.7883464098, 0.7409511209, 0.689540565, 0.6343932748, 0.5758081675, 0.514102757, 0.449611336, 0.3826834261, 0.3136817515, 0.2429801822, 0.1709618866, 0.09801714122, 0.02454122901, -0.04906767607, -0.1224106774, -0.1950903237, -0.266712755, -0.336889863, -0.4052413106, -0.4713967443, -0.534997642, -0.5956993103, -0.6531728506, -0.7071067691, -0.7572088242, -0.8032075167, -0.84485358, -0.8819212914, -0.9142097831, -0.9415440559, -0.963776052, -0.9807852507, -0.9924795628, -0.9987954497, -0.9996988177, -0.9951847196, -0.9852776527, -0.9700312614, -0.9495281577, -0.9238795042, -0.893224299, -0.8577286005, -0.8175848126, -0.7730104327, -0.724247098, -0.6715589762, -0.6152315736, -0.5555702448, -0.492898196, -0.4275550842, -0.3598950505, -0.2902846634, -0.2191012353, -0.1467304677, -0.07356456667, 1, 0.9972904325, 0.9891765118, 0.975702107, 0.9569403529, 0.932992816, 0.903989315, 0.8700869679, 0.8314695954, 0.7883464098, 0.7409511209, 0.689540565, 0.6343932748, 0.5758081675, 0.514102757, 0.449611336, 0.3826834261, 0.3136817515, 0.2429801822, 0.1709618866, 0.09801714122, 0.02454122901, -0.04906767607, -0.1224106774, -0.1950903237, -0.266712755, -0.336889863, -0.4052413106, -0.4713967443, -0.534997642, -0.5956993103, -0.6531728506, -0.7071067691, -0.7572088242, -0.8032075167, -0.84485358, -0.8819212914, -0.9142097831, -0.9415440559, -0.963776052, -0.9807852507, -0.9924795628, -0.9987954497, -0.9996988177, -0.9951847196, -0.9852776527, -0.9700312614, -0.9495281577, -0.9238795042, -0.893224299, -0.8577286005, -0.8175848126, -0.7730104327, -0.724247098, -0.6715589762, -0.6152315736, -0.5555702448, -0.492898196, -0.4275550842, -0.3598950505, -0.2902846634, -0.2191012353, -0.1467304677, -0.07356456667, 1, 0.9972904325, 0.9891765118, 0.975702107, 0.9569403529, 0.932992816, 0.903989315, 0.8700869679, 0.8314695954, 0.7883464098, 0.7409511209, 0.689540565, 0.6343932748, 0.5758081675, 0.514102757, 0.449611336, 0.3826834261, 0.3136817515, 0.2429801822, 0.1709618866, 0.09801714122, 0.02454122901, -0.04906767607, -0.1224106774, -0.1950903237, -0.266712755, -0.336889863, -0.4052413106, -0.4713967443, -0.534997642, -0.5956993103, -0.6531728506, -0.7071067691, -0.7572088242, -0.8032075167, -0.84485358, -0.8819212914, -0.9142097831, -0.9415440559, -0.963776052, -0.9807852507, -0.9924795628, -0.9987954497, -0.9996988177, -0.9951847196, -0.9852776527, -0.9700312614, -0.9495281577, -0.9238795042, -0.893224299, -0.8577286005, -0.8175848126, -0.7730104327, -0.724247098, -0.6715589762, -0.6152315736, -0.5555702448, -0.492898196, -0.4275550842, -0.3598950505, -0.2902846634, -0.2191012353, -0.1467304677, -0.07356456667, 1, 0.9972904325, 0.9891765118, 0.975702107, 0.9569403529, 0.932992816, 0.903989315, 0.8700869679, 0.8314695954, 0.7883464098, 0.7409511209, 0.689540565, 0.6343932748, 0.5758081675, 0.514102757, 0.449611336, 0.3826834261, 0.3136817515, 0.2429801822, 0.1709618866, 0.09801714122, 0.02454122901, -0.04906767607, -0.1224106774, -0.1950903237, -0.266712755, -0.336889863, -0.4052413106, -0.4713967443, -0.534997642, -0.5956993103, -0.6531728506, -0.7071067691, -0.7572088242, -0.8032075167, -0.84485358, -0.8819212914, -0.9142097831, -0.9415440559, -0.963776052, -0.9807852507, -0.9924795628, -0.9987954497, -0.9996988177, -0.9951847196, -0.9852776527, -0.9700312614, -0.9495281577, -0.9238795042, -0.893224299, -0.8577286005, -0.8175848126, -0.7730104327, -0.724247098, -0.6715589762, -0.6152315736, -0.5555702448, -0.492898196, -0.4275550842, -0.3598950505, -0.2902846634, -0.2191012353, -0.1467304677, -0.07356456667, 1, 0.9972904325, 0.9891765118, 0.975702107, 0.9569403529, 0.932992816, 0.903989315, 0.8700869679, 0.8314695954, 0.7883464098, 0.7409511209, 0.689540565, 0.6343932748, 0.5758081675, 0.514102757, 0.449611336, 0.3826834261, 0.3136817515, 0.2429801822, 0.1709618866, 0.09801714122, 0.02454122901, -0.04906767607, -0.1224106774, -0.1950903237, -0.266712755, -0.336889863, -0.4052413106, -0.4713967443, -0.534997642, -0.5956993103, -0.6531728506, -0.7071067691, -0.7572088242, -0.8032075167, -0.84485358, -0.8819212914, -0.9142097831, -0.9415440559, -0.963776052, -0.9807852507, -0.9924795628, -0.9987954497, -0.9996988177, -0.9951847196, -0.9852776527, -0.9700312614, -0.9495281577, -0.9238795042, -0.893224299, -0.8577286005, -0.8175848126, -0.7730104327, -0.724247098, -0.6715589762, -0.6152315736, -0.5555702448, -0.492898196, -0.4275550842, -0.3598950505, -0.2902846634, -0.2191012353, -0.1467304677, -0.07356456667, 1, 0.9972904325, 0.9891765118, 0.975702107, 0.9569403529, 0.932992816, 0.903989315, 0.8700869679, 0.8314695954, 0.7883464098, 0.7409511209, 0.689540565, 0.6343932748, 0.5758081675, 0.514102757, 0.449611336, 0.3826834261, 0.3136817515, 0.2429801822, 0.1709618866, 0.09801714122, 0.02454122901, -0.04906767607, -0.1224106774, -0.1950903237, -0.266712755, -0.336889863, -0.4052413106, -0.4713967443, -0.534997642, -0.5956993103, -0.6531728506, -0.7071067691, -0.7572088242, -0.8032075167, -0.84485358, -0.8819212914, -0.9142097831, -0.9415440559, -0.963776052, -0.9807852507, -0.9924795628, -0.9987954497, -0.9996988177, -0.9951847196, -0.9852776527, -0.9700312614, -0.9495281577, -0.9238795042, -0.893224299, -0.8577286005, -0.8175848126, -0.7730104327, -0.724247098, -0.6715589762, -0.6152315736, -0.5555702448, -0.492898196, -0.4275550842, -0.3598950505, -0.2902846634, -0.2191012353, -0.1467304677, -0.07356456667, 1, 0.9972904325, 0.9891765118, 0.975702107, 0.9569403529, 0.932992816, 0.903989315, 0.8700869679, 0.8314695954, 0.7883464098, 0.7409511209, 0.689540565, 0.6343932748, 0.5758081675, 0.514102757, 0.449611336, 0.3826834261, 0.3136817515, 0.2429801822, 0.1709618866, 0.09801714122, 0.02454122901, -0.04906767607, -0.1224106774, -0.1950903237, -0.266712755, -0.336889863, -0.4052413106, -0.4713967443, -0.534997642, -0.5956993103, -0.6531728506, -0.7071067691, -0.7572088242, -0.8032075167, -0.84485358, -0.8819212914, -0.9142097831, -0.9415440559, -0.963776052, -0.9807852507, -0.9924795628, -0.9987954497, -0.9996988177, -0.9951847196, -0.9852776527, -0.9700312614, -0.9495281577, -0.9238795042, -0.893224299, -0.8577286005, -0.8175848126, -0.7730104327, -0.724247098, -0.6715589762, -0.6152315736, -0.5555702448, -0.492898196, -0.4275550842, -0.3598950505, -0.2902846634, -0.2191012353, -0.1467304677, -0.07356456667, 1, 0.9972904325, 0.9891765118, 0.975702107, 0.9569403529, 0.932992816, 0.903989315, 0.8700869679, 0.8314695954, 0.7883464098, 0.7409511209, 0.689540565, 0.6343932748, 0.5758081675, 0.514102757, 0.449611336, 0.3826834261, 0.3136817515, 0.2429801822, 0.1709618866, 0.09801714122, 0.02454122901, -0.04906767607, -0.1224106774, -0.1950903237, -0.266712755, -0.336889863, -0.4052413106, -0.4713967443, -0.534997642, -0.5956993103, -0.6531728506, -0.7071067691, -0.7572088242, -0.8032075167, -0.84485358, -0.8819212914, -0.9142097831, -0.9415440559, -0.963776052, -0.9807852507, -0.9924795628, -0.9987954497, -0.9996988177, -0.9951847196, -0.9852776527, -0.9700312614, -0.9495281577, -0.9238795042, -0.893224299, -0.8577286005, -0.8175848126, -0.7730104327, -0.724247098, -0.6715589762, -0.6152315736, -0.5555702448, -0.492898196, -0.4275550842, -0.3598950505, -0.2902846634, -0.2191012353, -0.1467304677, -0.07356456667}; -constant float tc25[512] = {1, 0.9972904325, 0.9891765118, 0.975702107, 0.9569403529, 0.932992816, 0.903989315, 0.8700869679, 0.8314695954, 0.7883464098, 0.7409511209, 0.689540565, 0.6343932748, 0.5758081675, 0.514102757, 0.449611336, 0.3826834261, 0.3136817515, 0.2429801822, 0.1709618866, 0.09801714122, 0.02454122901, -0.04906767607, -0.1224106774, -0.1950903237, -0.266712755, -0.336889863, -0.4052413106, -0.4713967443, -0.534997642, -0.5956993103, -0.6531728506, -0.7071067691, -0.7572088242, -0.8032075167, -0.84485358, -0.8819212914, -0.9142097831, -0.9415440559, -0.963776052, -0.9807852507, -0.9924795628, -0.9987954497, -0.9996988177, -0.9951847196, -0.9852776527, -0.9700312614, -0.9495281577, -0.9238795042, -0.893224299, -0.8577286005, -0.8175848126, -0.7730104327, -0.724247098, -0.6715589762, -0.6152315736, -0.5555702448, -0.492898196, -0.4275550842, -0.3598950505, -0.2902846634, -0.2191012353, -0.1467304677, -0.07356456667, 1, 0.9972904325, 0.9891765118, 0.975702107, 0.9569403529, 0.932992816, 0.903989315, 0.8700869679, 0.8314695954, 0.7883464098, 0.7409511209, 0.689540565, 0.6343932748, 0.5758081675, 0.514102757, 0.449611336, 0.3826834261, 0.3136817515, 0.2429801822, 0.1709618866, 0.09801714122, 0.02454122901, -0.04906767607, -0.1224106774, -0.1950903237, -0.266712755, -0.336889863, -0.4052413106, -0.4713967443, -0.534997642, -0.5956993103, -0.6531728506, -0.7071067691, -0.7572088242, -0.8032075167, -0.84485358, -0.8819212914, -0.9142097831, -0.9415440559, -0.963776052, -0.9807852507, -0.9924795628, -0.9987954497, -0.9996988177, -0.9951847196, -0.9852776527, -0.9700312614, -0.9495281577, -0.9238795042, -0.893224299, -0.8577286005, -0.8175848126, -0.7730104327, -0.724247098, -0.6715589762, -0.6152315736, -0.5555702448, -0.492898196, -0.4275550842, -0.3598950505, -0.2902846634, -0.2191012353, -0.1467304677, -0.07356456667, 1, 0.9972904325, 0.9891765118, 0.975702107, 0.9569403529, 0.932992816, 0.903989315, 0.8700869679, 0.8314695954, 0.7883464098, 0.7409511209, 0.689540565, 0.6343932748, 0.5758081675, 0.514102757, 0.449611336, 0.3826834261, 0.3136817515, 0.2429801822, 0.1709618866, 0.09801714122, 0.02454122901, -0.04906767607, -0.1224106774, -0.1950903237, -0.266712755, -0.336889863, -0.4052413106, -0.4713967443, -0.534997642, -0.5956993103, -0.6531728506, -0.7071067691, -0.7572088242, -0.8032075167, -0.84485358, -0.8819212914, -0.9142097831, -0.9415440559, -0.963776052, -0.9807852507, -0.9924795628, -0.9987954497, -0.9996988177, -0.9951847196, -0.9852776527, -0.9700312614, -0.9495281577, -0.9238795042, -0.893224299, -0.8577286005, -0.8175848126, -0.7730104327, -0.724247098, -0.6715589762, -0.6152315736, -0.5555702448, -0.492898196, -0.4275550842, -0.3598950505, -0.2902846634, -0.2191012353, -0.1467304677, -0.07356456667, 1, 0.9972904325, 0.9891765118, 0.975702107, 0.9569403529, 0.932992816, 0.903989315, 0.8700869679, 0.8314695954, 0.7883464098, 0.7409511209, 0.689540565, 0.6343932748, 0.5758081675, 0.514102757, 0.449611336, 0.3826834261, 0.3136817515, 0.2429801822, 0.1709618866, 0.09801714122, 0.02454122901, -0.04906767607, -0.1224106774, -0.1950903237, -0.266712755, -0.336889863, -0.4052413106, -0.4713967443, -0.534997642, -0.5956993103, -0.6531728506, -0.7071067691, -0.7572088242, -0.8032075167, -0.84485358, -0.8819212914, -0.9142097831, -0.9415440559, -0.963776052, -0.9807852507, -0.9924795628, -0.9987954497, -0.9996988177, -0.9951847196, -0.9852776527, -0.9700312614, -0.9495281577, -0.9238795042, -0.893224299, -0.8577286005, -0.8175848126, -0.7730104327, -0.724247098, -0.6715589762, -0.6152315736, -0.5555702448, -0.492898196, -0.4275550842, -0.3598950505, -0.2902846634, -0.2191012353, -0.1467304677, -0.07356456667, 1, 0.9972904325, 0.9891765118, 0.975702107, 0.9569403529, 0.932992816, 0.903989315, 0.8700869679, 0.8314695954, 0.7883464098, 0.7409511209, 0.689540565, 0.6343932748, 0.5758081675, 0.514102757, 0.449611336, 0.3826834261, 0.3136817515, 0.2429801822, 0.1709618866, 0.09801714122, 0.02454122901, -0.04906767607, -0.1224106774, -0.1950903237, -0.266712755, -0.336889863, -0.4052413106, -0.4713967443, -0.534997642, -0.5956993103, -0.6531728506, -0.7071067691, -0.7572088242, -0.8032075167, -0.84485358, -0.8819212914, -0.9142097831, -0.9415440559, -0.963776052, -0.9807852507, -0.9924795628, -0.9987954497, -0.9996988177, -0.9951847196, -0.9852776527, -0.9700312614, -0.9495281577, -0.9238795042, -0.893224299, -0.8577286005, -0.8175848126, -0.7730104327, -0.724247098, -0.6715589762, -0.6152315736, -0.5555702448, -0.492898196, -0.4275550842, -0.3598950505, -0.2902846634, -0.2191012353, -0.1467304677, -0.07356456667, 1, 0.9972904325, 0.9891765118, 0.975702107, 0.9569403529, 0.932992816, 0.903989315, 0.8700869679, 0.8314695954, 0.7883464098, 0.7409511209, 0.689540565, 0.6343932748, 0.5758081675, 0.514102757, 0.449611336, 0.3826834261, 0.3136817515, 0.2429801822, 0.1709618866, 0.09801714122, 0.02454122901, -0.04906767607, -0.1224106774, -0.1950903237, -0.266712755, -0.336889863, -0.4052413106, -0.4713967443, -0.534997642, -0.5956993103, -0.6531728506, -0.7071067691, -0.7572088242, -0.8032075167, -0.84485358, -0.8819212914, -0.9142097831, -0.9415440559, -0.963776052, -0.9807852507, -0.9924795628, -0.9987954497, -0.9996988177, -0.9951847196, -0.9852776527, -0.9700312614, -0.9495281577, -0.9238795042, -0.893224299, -0.8577286005, -0.8175848126, -0.7730104327, -0.724247098, -0.6715589762, -0.6152315736, -0.5555702448, -0.492898196, -0.4275550842, -0.3598950505, -0.2902846634, -0.2191012353, -0.1467304677, -0.07356456667, 1, 0.9972904325, 0.9891765118, 0.975702107, 0.9569403529, 0.932992816, 0.903989315, 0.8700869679, 0.8314695954, 0.7883464098, 0.7409511209, 0.689540565, 0.6343932748, 0.5758081675, 0.514102757, 0.449611336, 0.3826834261, 0.3136817515, 0.2429801822, 0.1709618866, 0.09801714122, 0.02454122901, -0.04906767607, -0.1224106774, -0.1950903237, -0.266712755, -0.336889863, -0.4052413106, -0.4713967443, -0.534997642, -0.5956993103, -0.6531728506, -0.7071067691, -0.7572088242, -0.8032075167, -0.84485358, -0.8819212914, -0.9142097831, -0.9415440559, -0.963776052, -0.9807852507, -0.9924795628, -0.9987954497, -0.9996988177, -0.9951847196, -0.9852776527, -0.9700312614, -0.9495281577, -0.9238795042, -0.893224299, -0.8577286005, -0.8175848126, -0.7730104327, -0.724247098, -0.6715589762, -0.6152315736, -0.5555702448, -0.492898196, -0.4275550842, -0.3598950505, -0.2902846634, -0.2191012353, -0.1467304677, -0.07356456667, 1, 0.9972904325, 0.9891765118, 0.975702107, 0.9569403529, 0.932992816, 0.903989315, 0.8700869679, 0.8314695954, 0.7883464098, 0.7409511209, 0.689540565, 0.6343932748, 0.5758081675, 0.514102757, 0.449611336, 0.3826834261, 0.3136817515, 0.2429801822, 0.1709618866, 0.09801714122, 0.02454122901, -0.04906767607, -0.1224106774, -0.1950903237, -0.266712755, -0.336889863, -0.4052413106, -0.4713967443, -0.534997642, -0.5956993103, -0.6531728506, -0.7071067691, -0.7572088242, -0.8032075167, -0.84485358, -0.8819212914, -0.9142097831, -0.9415440559, -0.963776052, -0.9807852507, -0.9924795628, -0.9987954497, -0.9996988177, -0.9951847196, -0.9852776527, -0.9700312614, -0.9495281577, -0.9238795042, -0.893224299, -0.8577286005, -0.8175848126, -0.7730104327, -0.724247098, -0.6715589762, -0.6152315736, -0.5555702448, -0.492898196, -0.4275550842, -0.3598950505, -0.2902846634, -0.2191012353, -0.1467304677, -0.07356456667}; -constant float tc30[512] = {1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507}; -constant float tc33[512] = {1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, 1, 0.9807852507, 0.9238795042, 0.8314695954, 0.7071067691, 0.5555702448, 0.3826834261, 0.1950903237, 6.123234263e-17, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507}; -constant float tc31[512] = {1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122}; -constant float tc34[512] = {1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122, 1, 0.9951847196, 0.9807852507, 0.9569403529, 0.9238795042, 0.8819212914, 0.8314695954, 0.7730104327, 0.7071067691, 0.6343932748, 0.5555702448, 0.4713967443, 0.3826834261, 0.2902846634, 0.1950903237, 0.09801714122}; -constant float tc32[512] = {1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634}; -constant float tc35[512] = {1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634, 1, 0.9569403529, 0.8314695954, 0.6343932748, 0.3826834261, 0.09801714122, -0.1950903237, -0.4713967443, -0.7071067691, -0.8819212914, -0.9807852507, -0.9951847196, -0.9238795042, -0.7730104327, -0.5555702448, -0.2902846634}; -constant float tc40[512] = {1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691}; -constant float tc43[512] = {1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691, 1, 0.7071067691, 6.123234263e-17, -0.7071067691}; -constant float tc41[512] = {1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261}; -constant float tc44[512] = {1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261, 1, 0.9238795042, 0.7071067691, 0.3826834261}; -constant float tc42[512] = {1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042}; -constant float tc45[512] = {1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042, 1, 0.3826834261, -0.7071067691, -0.9238795042}; - - - -constant float ts00[512] = {-0, -0.003067956772, -0.006135884672, -0.009203754365, -0.01227153838, -0.01533920597, -0.01840673015, -0.02147408016, -0.02454122901, -0.02760814503, -0.030674804, -0.0337411724, -0.03680722415, -0.03987292573, -0.0429382585, -0.04600318149, -0.04906767607, -0.05213170499, -0.05519524589, -0.05825826526, -0.061320737, -0.06438262761, -0.06744392216, -0.07050457597, -0.07356456667, -0.07662386447, -0.07968243957, -0.08274026215, -0.08579730988, -0.08885355294, -0.09190895408, -0.09496349841, -0.09801714122, -0.1010698602, -0.1041216329, -0.1071724221, -0.1102222055, -0.1132709533, -0.1163186282, -0.1193652153, -0.1224106774, -0.1254549772, -0.1284981072, -0.1315400302, -0.1345807016, -0.1376201212, -0.1406582445, -0.1436950266, -0.1467304677, -0.1497645378, -0.1527971923, -0.1558284014, -0.1588581502, -0.161886394, -0.1649131179, -0.167938292, -0.1709618866, -0.1739838719, -0.1770042181, -0.1800228953, -0.1830398887, -0.1860551536, -0.1890686601, -0.1920803934, -0.1950903237, -0.1980984062, -0.201104641, -0.2041089684, -0.2071113735, -0.2101118416, -0.2131103128, -0.2161068022, -0.2191012353, -0.2220936269, -0.2250839174, -0.228072077, -0.2310581058, -0.234041959, -0.2370236069, -0.2400030196, -0.2429801822, -0.24595505, -0.2489276081, -0.2518978119, -0.2548656464, -0.2578310966, -0.2607941031, -0.2637546659, -0.266712755, -0.2696683109, -0.2726213634, -0.2755718231, -0.27851969, -0.2814649343, -0.2844075263, -0.2873474658, -0.2902846634, -0.2932191491, -0.296150893, -0.2990798354, -0.3020059466, -0.3049292266, -0.3078496456, -0.310767144, -0.3136817515, -0.3165933788, -0.3195020258, -0.3224076927, -0.3253102899, -0.3282098472, -0.3311063051, -0.3339996636, -0.336889863, -0.3397768736, -0.3426607251, -0.3455413282, -0.3484186828, -0.3512927592, -0.3541635275, -0.3570309579, -0.3598950505, -0.3627557158, -0.3656129837, -0.3684668243, -0.3713172078, -0.3741640747, -0.3770074248, -0.3798471987, -0.3826834261, -0.3855160475, -0.3883450329, -0.3911703825, -0.3939920366, -0.3968099952, -0.3996241987, -0.4024346471, -0.4052413106, -0.4080441594, -0.4108431637, -0.4136383235, -0.4164295495, -0.4192169011, -0.4220002592, -0.4247796834, -0.4275550842, -0.4303264916, -0.433093816, -0.4358570874, -0.438616246, -0.4413712621, -0.4441221356, -0.4468688369, -0.449611336, -0.4523495734, -0.4550835788, -0.4578132927, -0.4605387151, -0.4632597864, -0.4659765065, -0.4686888158, -0.4713967443, -0.4741002023, -0.4767992198, -0.479493767, -0.4821837842, -0.4848692417, -0.4875501692, -0.4902264774, -0.492898196, -0.4955652654, -0.4982276559, -0.5008853674, -0.5035383701, -0.5061866641, -0.5088301301, -0.5114688277, -0.514102757, -0.5167317986, -0.5193560123, -0.5219752789, -0.5245896578, -0.5271991491, -0.5298036337, -0.5324031115, -0.534997642, -0.5375870466, -0.5401714444, -0.5427507758, -0.5453249812, -0.5478940606, -0.5504579544, -0.5530167222, -0.5555702448, -0.5581185222, -0.5606615543, -0.5631993413, -0.5657318234, -0.5682589412, -0.5707807541, -0.573297143, -0.5758081675, -0.5783137679, -0.5808139443, -0.5833086371, -0.5857978463, -0.5882815719, -0.5907596946, -0.5932322741, -0.5956993103, -0.5981606841, -0.6006164551, -0.6030666232, -0.6055110693, -0.6079497933, -0.6103827953, -0.6128100753, -0.6152315736, -0.6176472902, -0.6200572252, -0.6224612594, -0.6248595119, -0.6272518039, -0.6296382546, -0.6320187449, -0.6343932748, -0.6367618442, -0.6391244531, -0.6414810419, -0.6438315511, -0.6461760402, -0.64851439, -0.6508466601, -0.6531728506, -0.6554928422, -0.6578066945, -0.6601143479, -0.6624158025, -0.6647109985, -0.6669999361, -0.6692826152, -0.6715589762, -0.6738290191, -0.6760926843, -0.6783500314, -0.6806010008, -0.6828455329, -0.6850836873, -0.6873153448, -0.689540565, -0.6917592287, -0.6939714551, -0.696177125, -0.6983762383, -0.7005687952, -0.7027547359, -0.7049340606, -0.7071067691, -0.7092728019, -0.7114322186, -0.7135848403, -0.7157308459, -0.7178700566, -0.720002532, -0.7221282125, -0.724247098, -0.726359129, -0.728464365, -0.7305627465, -0.7326542735, -0.7347388864, -0.7368165851, -0.73888731, -0.7409511209, -0.7430079579, -0.7450577617, -0.7471005917, -0.7491363883, -0.7511651516, -0.7531868219, -0.7552013993, -0.7572088242, -0.7592092156, -0.761202395, -0.7631884217, -0.7651672363, -0.7671388984, -0.7691033483, -0.7710605264, -0.7730104327, -0.7749531269, -0.7768884897, -0.7788165212, -0.7807372212, -0.7826505899, -0.7845565677, -0.786455214, -0.7883464098, -0.7902302146, -0.7921065688, -0.7939754725, -0.7958369255, -0.7976908684, -0.7995372415, -0.801376164, -0.8032075167, -0.8050313592, -0.8068475723, -0.8086561561, -0.81045717, -0.8122506142, -0.8140363097, -0.8158144355, -0.8175848126, -0.8193475008, -0.8211025, -0.8228498101, -0.8245893121, -0.8263210654, -0.8280450702, -0.8297612071, -0.8314695954, -0.8331701756, -0.8348628879, -0.8365477324, -0.838224709, -0.8398938179, -0.8415549994, -0.8432082534, -0.84485358, -0.8464909196, -0.8481203318, -0.8497417569, -0.851355195, -0.8529605865, -0.854557991, -0.8561473489, -0.8577286005, -0.8593018055, -0.8608669639, -0.8624239564, -0.8639728427, -0.8655136228, -0.867046237, -0.8685706854, -0.8700869679, -0.8715950847, -0.8730949759, -0.8745866418, -0.8760700822, -0.8775452971, -0.8790122271, -0.8804708719, -0.8819212914, -0.8833633661, -0.8847970963, -0.8862225413, -0.8876396418, -0.8890483379, -0.8904487491, -0.8918406963, -0.893224299, -0.8945994973, -0.8959662318, -0.8973245621, -0.8986744881, -0.9000158906, -0.9013488293, -0.9026733041, -0.903989315, -0.9052967429, -0.9065957069, -0.9078860879, -0.909168005, -0.9104412794, -0.9117060304, -0.9129621983, -0.9142097831, -0.9154487252, -0.9166790843, -0.9179008007, -0.9191138744, -0.9203183055, -0.9215140343, -0.9227011204, -0.9238795042, -0.9250492454, -0.9262102246, -0.9273625016, -0.9285060763, -0.9296408892, -0.9307669401, -0.9318842888, -0.932992816, -0.9340925217, -0.9351835251, -0.9362656474, -0.9373390079, -0.9384035468, -0.9394592047, -0.940506041, -0.9415440559, -0.9425731897, -0.9435934424, -0.9446048141, -0.9456073046, -0.946600914, -0.9475855827, -0.9485613704, -0.9495281577, -0.950486064, -0.9514350295, -0.9523749948, -0.9533060193, -0.9542281032, -0.9551411867, -0.95604527, -0.9569403529, -0.9578264356, -0.9587034583, -0.9595715404, -0.9604305029, -0.9612804651, -0.9621214271, -0.9629532695, -0.963776052, -0.9645897746, -0.9653944373, -0.9661899805, -0.9669764638, -0.9677538276, -0.9685220718, -0.9692812562, -0.9700312614, -0.9707721472, -0.9715039134, -0.9722265005, -0.9729399681, -0.9736442566, -0.974339366, -0.9750253558, -0.975702107, -0.9763697386, -0.9770281315, -0.9776773453, -0.97831738, -0.9789481759, -0.9795697927, -0.9801821113, -0.9807852507, -0.9813792109, -0.9819638729, -0.9825392962, -0.9831054807, -0.9836624265, -0.9842100739, -0.9847484827, -0.9852776527, -0.9857975245, -0.9863080978, -0.9868093729, -0.9873014092, -0.9877841473, -0.988257587, -0.9887216687, -0.9891765118, -0.9896219969, -0.9900581837, -0.9904850721, -0.9909026623, -0.9913108349, -0.9917097688, -0.9920992851, -0.9924795628, -0.9928504229, -0.993211925, -0.9935641289, -0.9939069748, -0.9942404628, -0.9945645928, -0.9948793054, -0.9951847196, -0.9954807758, -0.9957674146, -0.9960446954, -0.9963126183, -0.9965711236, -0.996820271, -0.9970600605, -0.9972904325, -0.9975114465, -0.997723043, -0.9979252815, -0.9981181026, -0.9983015656, -0.9984755516, -0.9986402392, -0.9987954497, -0.9989413023, -0.9990777373, -0.9992047548, -0.9993223548, -0.9994305968, -0.9995294213, -0.9996188283, -0.9996988177, -0.9997693896, -0.9998306036, -0.9998823404, -0.9999247193, -0.9999576211, -0.9999811649, -0.9999952912}; -constant float ts03[512] = {-1, -0.9999952912, -0.9999811649, -0.9999576211, -0.9999247193, -0.9998823404, -0.9998306036, -0.9997693896, -0.9996988177, -0.9996188283, -0.9995294213, -0.9994305968, -0.9993223548, -0.9992047548, -0.9990777373, -0.9989413023, -0.9987954497, -0.9986402392, -0.9984755516, -0.9983015656, -0.9981181026, -0.9979252815, -0.997723043, -0.9975114465, -0.9972904325, -0.9970600605, -0.996820271, -0.9965711236, -0.9963126183, -0.9960446954, -0.9957674146, -0.9954807758, -0.9951847196, -0.9948793054, -0.9945645928, -0.9942404628, -0.9939069748, -0.9935641289, -0.993211925, -0.9928504229, -0.9924795628, -0.9920992851, -0.9917097688, -0.9913108349, -0.9909026623, -0.9904850721, -0.9900581837, -0.9896219969, -0.9891765118, -0.9887216687, -0.988257587, -0.9877841473, -0.9873014092, -0.9868093729, -0.9863080978, -0.9857975245, -0.9852776527, -0.9847484827, -0.9842100739, -0.9836624265, -0.9831054807, -0.9825392962, -0.9819638729, -0.9813792109, -0.9807852507, -0.9801821113, -0.9795697927, -0.9789481759, -0.97831738, -0.9776773453, -0.9770281315, -0.9763697386, -0.975702107, -0.9750253558, -0.974339366, -0.9736442566, -0.9729399681, -0.9722265005, -0.9715039134, -0.9707721472, -0.9700312614, -0.9692812562, -0.9685220718, -0.9677538276, -0.9669764638, -0.9661899805, -0.9653944373, -0.9645897746, -0.963776052, -0.9629532695, -0.9621214271, -0.9612804651, -0.9604305029, -0.9595715404, -0.9587034583, -0.9578264356, -0.9569403529, -0.95604527, -0.9551411867, -0.9542281032, -0.9533060193, -0.9523749948, -0.9514350295, -0.950486064, -0.9495281577, -0.9485613704, -0.9475855827, -0.946600914, -0.9456073046, -0.9446048141, -0.9435934424, -0.9425731897, -0.9415440559, -0.940506041, -0.9394592047, -0.9384035468, -0.9373390079, -0.9362656474, -0.9351835251, -0.9340925217, -0.932992816, -0.9318842888, -0.9307669401, -0.9296408892, -0.9285060763, -0.9273625016, -0.9262102246, -0.9250492454, -0.9238795042, -0.9227011204, -0.9215140343, -0.9203183055, -0.9191138744, -0.9179008007, -0.9166790843, -0.9154487252, -0.9142097831, -0.9129621983, -0.9117060304, -0.9104412794, -0.909168005, -0.9078860879, -0.9065957069, -0.9052967429, -0.903989315, -0.9026733041, -0.9013488293, -0.9000158906, -0.8986744881, -0.8973245621, -0.8959662318, -0.8945994973, -0.893224299, -0.8918406963, -0.8904487491, -0.8890483379, -0.8876396418, -0.8862225413, -0.8847970963, -0.8833633661, -0.8819212914, -0.8804708719, -0.8790122271, -0.8775452971, -0.8760700822, -0.8745866418, -0.8730949759, -0.8715950847, -0.8700869679, -0.8685706854, -0.867046237, -0.8655136228, -0.8639728427, -0.8624239564, -0.8608669639, -0.8593018055, -0.8577286005, -0.8561473489, -0.854557991, -0.8529605865, -0.851355195, -0.8497417569, -0.8481203318, -0.8464909196, -0.84485358, -0.8432082534, -0.8415549994, -0.8398938179, -0.838224709, -0.8365477324, -0.8348628879, -0.8331701756, -0.8314695954, -0.8297612071, -0.8280450702, -0.8263210654, -0.8245893121, -0.8228498101, -0.8211025, -0.8193475008, -0.8175848126, -0.8158144355, -0.8140363097, -0.8122506142, -0.81045717, -0.8086561561, -0.8068475723, -0.8050313592, -0.8032075167, -0.801376164, -0.7995372415, -0.7976908684, -0.7958369255, -0.7939754725, -0.7921065688, -0.7902302146, -0.7883464098, -0.786455214, -0.7845565677, -0.7826505899, -0.7807372212, -0.7788165212, -0.7768884897, -0.7749531269, -0.7730104327, -0.7710605264, -0.7691033483, -0.7671388984, -0.7651672363, -0.7631884217, -0.761202395, -0.7592092156, -0.7572088242, -0.7552013993, -0.7531868219, -0.7511651516, -0.7491363883, -0.7471005917, -0.7450577617, -0.7430079579, -0.7409511209, -0.73888731, -0.7368165851, -0.7347388864, -0.7326542735, -0.7305627465, -0.728464365, -0.726359129, -0.724247098, -0.7221282125, -0.720002532, -0.7178700566, -0.7157308459, -0.7135848403, -0.7114322186, -0.7092728019, -0.7071067691, -0.7049340606, -0.7027547359, -0.7005687952, -0.6983762383, -0.696177125, -0.6939714551, -0.6917592287, -0.689540565, -0.6873153448, -0.6850836873, -0.6828455329, -0.6806010008, -0.6783500314, -0.6760926843, -0.6738290191, -0.6715589762, -0.6692826152, -0.6669999361, -0.6647109985, -0.6624158025, -0.6601143479, -0.6578066945, -0.6554928422, -0.6531728506, -0.6508466601, -0.64851439, -0.6461760402, -0.6438315511, -0.6414810419, -0.6391244531, -0.6367618442, -0.6343932748, -0.6320187449, -0.6296382546, -0.6272518039, -0.6248595119, -0.6224612594, -0.6200572252, -0.6176472902, -0.6152315736, -0.6128100753, -0.6103827953, -0.6079497933, -0.6055110693, -0.6030666232, -0.6006164551, -0.5981606841, -0.5956993103, -0.5932322741, -0.5907596946, -0.5882815719, -0.5857978463, -0.5833086371, -0.5808139443, -0.5783137679, -0.5758081675, -0.573297143, -0.5707807541, -0.5682589412, -0.5657318234, -0.5631993413, -0.5606615543, -0.5581185222, -0.5555702448, -0.5530167222, -0.5504579544, -0.5478940606, -0.5453249812, -0.5427507758, -0.5401714444, -0.5375870466, -0.534997642, -0.5324031115, -0.5298036337, -0.5271991491, -0.5245896578, -0.5219752789, -0.5193560123, -0.5167317986, -0.514102757, -0.5114688277, -0.5088301301, -0.5061866641, -0.5035383701, -0.5008853674, -0.4982276559, -0.4955652654, -0.492898196, -0.4902264774, -0.4875501692, -0.4848692417, -0.4821837842, -0.479493767, -0.4767992198, -0.4741002023, -0.4713967443, -0.4686888158, -0.4659765065, -0.4632597864, -0.4605387151, -0.4578132927, -0.4550835788, -0.4523495734, -0.449611336, -0.4468688369, -0.4441221356, -0.4413712621, -0.438616246, -0.4358570874, -0.433093816, -0.4303264916, -0.4275550842, -0.4247796834, -0.4220002592, -0.4192169011, -0.4164295495, -0.4136383235, -0.4108431637, -0.4080441594, -0.4052413106, -0.4024346471, -0.3996241987, -0.3968099952, -0.3939920366, -0.3911703825, -0.3883450329, -0.3855160475, -0.3826834261, -0.3798471987, -0.3770074248, -0.3741640747, -0.3713172078, -0.3684668243, -0.3656129837, -0.3627557158, -0.3598950505, -0.3570309579, -0.3541635275, -0.3512927592, -0.3484186828, -0.3455413282, -0.3426607251, -0.3397768736, -0.336889863, -0.3339996636, -0.3311063051, -0.3282098472, -0.3253102899, -0.3224076927, -0.3195020258, -0.3165933788, -0.3136817515, -0.310767144, -0.3078496456, -0.3049292266, -0.3020059466, -0.2990798354, -0.296150893, -0.2932191491, -0.2902846634, -0.2873474658, -0.2844075263, -0.2814649343, -0.27851969, -0.2755718231, -0.2726213634, -0.2696683109, -0.266712755, -0.2637546659, -0.2607941031, -0.2578310966, -0.2548656464, -0.2518978119, -0.2489276081, -0.24595505, -0.2429801822, -0.2400030196, -0.2370236069, -0.234041959, -0.2310581058, -0.228072077, -0.2250839174, -0.2220936269, -0.2191012353, -0.2161068022, -0.2131103128, -0.2101118416, -0.2071113735, -0.2041089684, -0.201104641, -0.1980984062, -0.1950903237, -0.1920803934, -0.1890686601, -0.1860551536, -0.1830398887, -0.1800228953, -0.1770042181, -0.1739838719, -0.1709618866, -0.167938292, -0.1649131179, -0.161886394, -0.1588581502, -0.1558284014, -0.1527971923, -0.1497645378, -0.1467304677, -0.1436950266, -0.1406582445, -0.1376201212, -0.1345807016, -0.1315400302, -0.1284981072, -0.1254549772, -0.1224106774, -0.1193652153, -0.1163186282, -0.1132709533, -0.1102222055, -0.1071724221, -0.1041216329, -0.1010698602, -0.09801714122, -0.09496349841, -0.09190895408, -0.08885355294, -0.08579730988, -0.08274026215, -0.07968243957, -0.07662386447, -0.07356456667, -0.07050457597, -0.06744392216, -0.06438262761, -0.061320737, -0.05825826526, -0.05519524589, -0.05213170499, -0.04906767607, -0.04600318149, -0.0429382585, -0.03987292573, -0.03680722415, -0.0337411724, -0.030674804, -0.02760814503, -0.02454122901, -0.02147408016, -0.01840673015, -0.01533920597, -0.01227153838, -0.009203754365, -0.006135884672, -0.003067956772}; -constant float ts01[512] = {-0, -0.001533980132, -0.003067956772, -0.004601926077, -0.006135884672, -0.007669828832, -0.009203754365, -0.01073765941, -0.01227153838, -0.01380538847, -0.01533920597, -0.01687298715, -0.01840673015, -0.01994042844, -0.02147408016, -0.02300768159, -0.02454122901, -0.02607471868, -0.02760814503, -0.02914150804, -0.030674804, -0.03220802546, -0.0337411724, -0.03527423739, -0.03680722415, -0.03834012151, -0.03987292573, -0.04140564054, -0.0429382585, -0.04447077215, -0.04600318149, -0.04753548279, -0.04906767607, -0.05059975013, -0.05213170499, -0.05366353691, -0.05519524589, -0.05672682077, -0.05825826526, -0.05978957191, -0.061320737, -0.06285175681, -0.06438262761, -0.06591334939, -0.06744392216, -0.06897433102, -0.07050457597, -0.07203464955, -0.07356456667, -0.07509429753, -0.07662386447, -0.07815324515, -0.07968243957, -0.08121144772, -0.08274026215, -0.08426889032, -0.08579730988, -0.08732553571, -0.08885355294, -0.09038136154, -0.09190895408, -0.09343633801, -0.09496349841, -0.09649042785, -0.09801714122, -0.09954361618, -0.1010698602, -0.1025958657, -0.1041216329, -0.1056471542, -0.1071724221, -0.1086974442, -0.1102222055, -0.1117467135, -0.1132709533, -0.1147949249, -0.1163186282, -0.1178420633, -0.1193652153, -0.1208880842, -0.1224106774, -0.1239329726, -0.1254549772, -0.1269766986, -0.1284981072, -0.1300192177, -0.1315400302, -0.1330605298, -0.1345807016, -0.1361005753, -0.1376201212, -0.1391393393, -0.1406582445, -0.1421768069, -0.1436950266, -0.1452129185, -0.1467304677, -0.1482476741, -0.1497645378, -0.1512810439, -0.1527971923, -0.1543129683, -0.1558284014, -0.1573434621, -0.1588581502, -0.1603724509, -0.161886394, -0.1633999497, -0.1649131179, -0.1664258987, -0.167938292, -0.169450298, -0.1709618866, -0.1724730879, -0.1739838719, -0.1754942536, -0.1770042181, -0.1785137653, -0.1800228953, -0.1815316081, -0.1830398887, -0.1845477372, -0.1860551536, -0.1875621229, -0.1890686601, -0.1905747503, -0.1920803934, -0.1935855895, -0.1950903237, -0.1965945959, -0.1980984062, -0.1996017545, -0.201104641, -0.2026070356, -0.2041089684, -0.2056104094, -0.2071113735, -0.208611846, -0.2101118416, -0.2116113305, -0.2131103128, -0.2146088183, -0.2161068022, -0.2176042795, -0.2191012353, -0.2205976844, -0.2220936269, -0.2235890329, -0.2250839174, -0.2265782654, -0.228072077, -0.2295653671, -0.2310581058, -0.2325503081, -0.234041959, -0.2355330586, -0.2370236069, -0.2385135889, -0.2400030196, -0.241491884, -0.2429801822, -0.2444678992, -0.24595505, -0.2474416196, -0.2489276081, -0.2504130006, -0.2518978119, -0.2533820271, -0.2548656464, -0.2563486695, -0.2578310966, -0.2593129277, -0.2607941031, -0.2622747123, -0.2637546659, -0.2652340233, -0.266712755, -0.2681908607, -0.2696683109, -0.271145165, -0.2726213634, -0.2740969062, -0.2755718231, -0.2770460844, -0.27851969, -0.27999264, -0.2814649343, -0.282936573, -0.2844075263, -0.2858778238, -0.2873474658, -0.2888164222, -0.2902846634, -0.291752249, -0.2932191491, -0.2946853638, -0.296150893, -0.2976157069, -0.2990798354, -0.3005432487, -0.3020059466, -0.3034679592, -0.3049292266, -0.3063898087, -0.3078496456, -0.3093087673, -0.310767144, -0.3122248054, -0.3136817515, -0.3151379228, -0.3165933788, -0.3180480897, -0.3195020258, -0.3209552467, -0.3224076927, -0.3238593638, -0.3253102899, -0.3267604411, -0.3282098472, -0.3296584487, -0.3311063051, -0.3325533569, -0.3339996636, -0.3354451358, -0.336889863, -0.3383337557, -0.3397768736, -0.3412192166, -0.3426607251, -0.344101429, -0.3455413282, -0.3469804227, -0.3484186828, -0.3498561382, -0.3512927592, -0.3527285457, -0.3541635275, -0.3555976748, -0.3570309579, -0.3584634066, -0.3598950505, -0.3613258004, -0.3627557158, -0.3641847968, -0.3656129837, -0.3670403361, -0.3684668243, -0.3698924482, -0.3713172078, -0.3727410734, -0.3741640747, -0.3755861819, -0.3770074248, -0.3784277439, -0.3798471987, -0.3812657595, -0.3826834261, -0.3841001987, -0.3855160475, -0.3869310021, -0.3883450329, -0.3897581697, -0.3911703825, -0.3925816715, -0.3939920366, -0.3954014778, -0.3968099952, -0.3982175589, -0.3996241987, -0.4010298848, -0.4024346471, -0.4038384557, -0.4052413106, -0.4066432118, -0.4080441594, -0.4094441533, -0.4108431637, -0.4122412205, -0.4136383235, -0.4150344133, -0.4164295495, -0.4178237021, -0.4192169011, -0.4206090868, -0.4220002592, -0.4233904779, -0.4247796834, -0.4261678755, -0.4275550842, -0.4289412796, -0.4303264916, -0.4317106605, -0.433093816, -0.4344759583, -0.4358570874, -0.4372371733, -0.438616246, -0.4399942756, -0.4413712621, -0.4427472353, -0.4441221356, -0.4454960227, -0.4468688369, -0.448240608, -0.449611336, -0.4509809911, -0.4523495734, -0.4537171125, -0.4550835788, -0.4564489722, -0.4578132927, -0.4591765404, -0.4605387151, -0.4618997872, -0.4632597864, -0.4646186829, -0.4659765065, -0.4673331976, -0.4686888158, -0.4700433314, -0.4713967443, -0.4727490246, -0.4741002023, -0.4754502773, -0.4767992198, -0.4781470597, -0.479493767, -0.4808393419, -0.4821837842, -0.4835270643, -0.4848692417, -0.4862102866, -0.4875501692, -0.4888888896, -0.4902264774, -0.4915629029, -0.492898196, -0.4942322969, -0.4955652654, -0.4968970418, -0.4982276559, -0.4995571077, -0.5008853674, -0.5022124648, -0.5035383701, -0.5048630834, -0.5061866641, -0.5075089931, -0.5088301301, -0.510150075, -0.5114688277, -0.5127863884, -0.514102757, -0.5154178739, -0.5167317986, -0.5180445313, -0.5193560123, -0.5206662416, -0.5219752789, -0.523283124, -0.5245896578, -0.5258949995, -0.5271991491, -0.5285019875, -0.5298036337, -0.5311040282, -0.5324031115, -0.5337010026, -0.534997642, -0.5362929702, -0.5375870466, -0.538879931, -0.5401714444, -0.5414617658, -0.5427507758, -0.5440385342, -0.5453249812, -0.5466101766, -0.5478940606, -0.5491766334, -0.5504579544, -0.5517379642, -0.5530167222, -0.5542941093, -0.5555702448, -0.5568450093, -0.5581185222, -0.5593907237, -0.5606615543, -0.5619311333, -0.5631993413, -0.564466238, -0.5657318234, -0.566996038, -0.5682589412, -0.5695205331, -0.5707807541, -0.5720396042, -0.573297143, -0.5745533705, -0.5758081675, -0.5770616531, -0.5783137679, -0.5795645714, -0.5808139443, -0.582062006, -0.5833086371, -0.584553957, -0.5857978463, -0.5870403647, -0.5882815719, -0.5895212889, -0.5907596946, -0.5919966698, -0.5932322741, -0.5944665074, -0.5956993103, -0.5969306827, -0.5981606841, -0.5993893147, -0.6006164551, -0.6018422246, -0.6030666232, -0.6042895317, -0.6055110693, -0.6067311168, -0.6079497933, -0.6091670394, -0.6103827953, -0.6115971804, -0.6128100753, -0.6140215397, -0.6152315736, -0.616440177, -0.6176472902, -0.618852973, -0.6200572252, -0.6212599874, -0.6224612594, -0.6236611009, -0.6248595119, -0.6260563731, -0.6272518039, -0.6284457445, -0.6296382546, -0.630829215, -0.6320187449, -0.6332067847, -0.6343932748, -0.6355783343, -0.6367618442, -0.6379439235, -0.6391244531, -0.6403034925, -0.6414810419, -0.6426570415, -0.6438315511, -0.6450045109, -0.6461760402, -0.6473459601, -0.64851439, -0.6496813297, -0.6508466601, -0.65201056, -0.6531728506, -0.6543335915, -0.6554928422, -0.6566505432, -0.6578066945, -0.6589612961, -0.6601143479, -0.6612658501, -0.6624158025, -0.6635641456, -0.6647109985, -0.6658562422, -0.6669999361, -0.6681420207, -0.6692826152, -0.6704215407, -0.6715589762, -0.6726947427, -0.6738290191, -0.6749616265, -0.6760926843, -0.6772221923, -0.6783500314, -0.6794763207, -0.6806010008, -0.6817240715, -0.6828455329, -0.683965385, -0.6850836873, -0.6862003207, -0.6873153448, -0.6884287596, -0.689540565, -0.6906507015, -0.6917592287, -0.6928661466, -0.6939714551, -0.6950750947, -0.696177125, -0.6972774863, -0.6983762383, -0.6994733214, -0.7005687952, -0.7016626, -0.7027547359, -0.7038452625, -0.7049340606, -0.7060212493}; -constant float ts04[512] = {-0.7071067691, -0.7081906199, -0.7092728019, -0.7103533745, -0.7114322186, -0.7125093937, -0.7135848403, -0.7146586776, -0.7157308459, -0.7168012857, -0.7178700566, -0.718937099, -0.720002532, -0.7210661769, -0.7221282125, -0.7231884599, -0.724247098, -0.7253039479, -0.726359129, -0.727412641, -0.728464365, -0.72951442, -0.7305627465, -0.7316094041, -0.7326542735, -0.7336974144, -0.7347388864, -0.7357785702, -0.7368165851, -0.7378528118, -0.73888731, -0.7399200797, -0.7409511209, -0.7419804335, -0.7430079579, -0.7440337539, -0.7450577617, -0.7460801005, -0.7471005917, -0.7481193542, -0.7491363883, -0.7501516342, -0.7511651516, -0.7521768212, -0.7531868219, -0.7541949749, -0.7552013993, -0.756205976, -0.7572088242, -0.7582098842, -0.7592092156, -0.7602066994, -0.761202395, -0.7621963024, -0.7631884217, -0.7641787529, -0.7651672363, -0.7661539912, -0.7671388984, -0.7681220174, -0.7691033483, -0.7700828314, -0.7710605264, -0.7720363736, -0.7730104327, -0.7739827037, -0.7749531269, -0.7759217024, -0.7768884897, -0.7778534293, -0.7788165212, -0.7797777653, -0.7807372212, -0.7816948295, -0.7826505899, -0.7836045027, -0.7845565677, -0.7855068445, -0.786455214, -0.7874017358, -0.7883464098, -0.7892892361, -0.7902302146, -0.7911693454, -0.7921065688, -0.7930419445, -0.7939754725, -0.7949071527, -0.7958369255, -0.796764791, -0.7976908684, -0.7986149788, -0.7995372415, -0.8004576564, -0.801376164, -0.8022928238, -0.8032075167, -0.8041203618, -0.8050313592, -0.8059403896, -0.8068475723, -0.8077528477, -0.8086561561, -0.8095576167, -0.81045717, -0.8113548756, -0.8122506142, -0.8131443858, -0.8140363097, -0.8149263263, -0.8158144355, -0.8167005777, -0.8175848126, -0.8184671402, -0.8193475008, -0.8202259541, -0.8211025, -0.8219771385, -0.8228498101, -0.8237205148, -0.8245893121, -0.8254561424, -0.8263210654, -0.8271840215, -0.8280450702, -0.8289040923, -0.8297612071, -0.8306164145, -0.8314695954, -0.832320869, -0.8331701756, -0.8340175152, -0.8348628879, -0.8357062936, -0.8365477324, -0.8373872042, -0.838224709, -0.8390602469, -0.8398938179, -0.8407253623, -0.8415549994, -0.8423826098, -0.8432082534, -0.8440318704, -0.84485358, -0.8456732631, -0.8464909196, -0.8473066092, -0.8481203318, -0.8489320278, -0.8497417569, -0.8505494595, -0.851355195, -0.8521589041, -0.8529605865, -0.8537603021, -0.854557991, -0.8553536534, -0.8561473489, -0.8569389582, -0.8577286005, -0.8585162163, -0.8593018055, -0.8600853682, -0.8608669639, -0.8616464734, -0.8624239564, -0.8631994128, -0.8639728427, -0.864744246, -0.8655136228, -0.866280973, -0.867046237, -0.8678094745, -0.8685706854, -0.8693298697, -0.8700869679, -0.8708420396, -0.8715950847, -0.8723460436, -0.8730949759, -0.8738418221, -0.8745866418, -0.8753293753, -0.8760700822, -0.8768087029, -0.8775452971, -0.8782798052, -0.8790122271, -0.8797426224, -0.8804708719, -0.8811970949, -0.8819212914, -0.882643342, -0.8833633661, -0.8840812445, -0.8847970963, -0.8855108619, -0.8862225413, -0.8869321346, -0.8876396418, -0.8883450627, -0.8890483379, -0.8897495866, -0.8904487491, -0.8911457658, -0.8918406963, -0.8925335407, -0.893224299, -0.893912971, -0.8945994973, -0.8952839375, -0.8959662318, -0.8966464996, -0.8973245621, -0.898000598, -0.8986744881, -0.8993462324, -0.9000158906, -0.900683403, -0.9013488293, -0.9020121694, -0.9026733041, -0.9033323526, -0.903989315, -0.9046440721, -0.9052967429, -0.905947268, -0.9065957069, -0.9072420001, -0.9078860879, -0.9085280895, -0.909168005, -0.9098057151, -0.9104412794, -0.9110747576, -0.9117060304, -0.9123351574, -0.9129621983, -0.9135870337, -0.9142097831, -0.914830327, -0.9154487252, -0.9160649776, -0.9166790843, -0.9172909856, -0.9179008007, -0.9185084105, -0.9191138744, -0.919717133, -0.9203183055, -0.920917213, -0.9215140343, -0.9221086502, -0.9227011204, -0.9232914448, -0.9238795042, -0.9244654775, -0.9250492454, -0.9256308079, -0.9262102246, -0.9267874956, -0.9273625016, -0.9279354215, -0.9285060763, -0.9290745854, -0.9296408892, -0.9302050471, -0.9307669401, -0.9313266873, -0.9318842888, -0.9324396253, -0.932992816, -0.9335438013, -0.9340925217, -0.9346391559, -0.9351835251, -0.9357256889, -0.9362656474, -0.9368034601, -0.9373390079, -0.9378723502, -0.9384035468, -0.9389324784, -0.9394592047, -0.9399837255, -0.940506041, -0.9410261512, -0.9415440559, -0.9420597553, -0.9425731897, -0.9430844188, -0.9435934424, -0.9441002607, -0.9446048141, -0.9451072216, -0.9456073046, -0.9461052418, -0.946600914, -0.9470943809, -0.9475855827, -0.9480745792, -0.9485613704, -0.9490458965, -0.9495281577, -0.9500082731, -0.950486064, -0.9509616494, -0.9514350295, -0.9519061446, -0.9523749948, -0.9528416395, -0.9533060193, -0.9537681937, -0.9542281032, -0.9546857476, -0.9551411867, -0.9555943608, -0.95604527, -0.9564939141, -0.9569403529, -0.9573845267, -0.9578264356, -0.9582660794, -0.9587034583, -0.9591386318, -0.9595715404, -0.9600021243, -0.9604305029, -0.9608566165, -0.9612804651, -0.9617020488, -0.9621214271, -0.9625384808, -0.9629532695, -0.9633657932, -0.963776052, -0.9641840458, -0.9645897746, -0.9649932384, -0.9653944373, -0.9657933712, -0.9661899805, -0.9665843844, -0.9669764638, -0.9673662782, -0.9677538276, -0.968139112, -0.9685220718, -0.9689028263, -0.9692812562, -0.9696573615, -0.9700312614, -0.9704028368, -0.9707721472, -0.971139133, -0.9715039134, -0.9718663096, -0.9722265005, -0.9725843668, -0.9729399681, -0.9732932448, -0.9736442566, -0.9739929438, -0.974339366, -0.9746835232, -0.9750253558, -0.9753648639, -0.975702107, -0.9760370851, -0.9763697386, -0.9767000675, -0.9770281315, -0.9773538709, -0.9776773453, -0.9779984951, -0.97831738, -0.9786339402, -0.9789481759, -0.9792601466, -0.9795697927, -0.9798771143, -0.9801821113, -0.9804848433, -0.9807852507, -0.9810833931, -0.9813792109, -0.9816727042, -0.9819638729, -0.982252717, -0.9825392962, -0.9828235507, -0.9831054807, -0.9833850861, -0.9836624265, -0.9839374423, -0.9842100739, -0.9844804406, -0.9847484827, -0.9850142598, -0.9852776527, -0.9855387211, -0.9857975245, -0.9860539436, -0.9863080978, -0.9865599275, -0.9868093729, -0.9870565534, -0.9873014092, -0.9875439405, -0.9877841473, -0.9880220294, -0.988257587, -0.9884908199, -0.9887216687, -0.9889502525, -0.9891765118, -0.9894004464, -0.9896219969, -0.9898412824, -0.9900581837, -0.99027282, -0.9904850721, -0.9906949997, -0.9909026623, -0.9911079407, -0.9913108349, -0.9915114641, -0.9917097688, -0.9919056892, -0.9920992851, -0.992290616, -0.9924795628, -0.9926661253, -0.9928504229, -0.9930323362, -0.993211925, -0.9933891892, -0.9935641289, -0.9937367439, -0.9939069748, -0.9940748811, -0.9942404628, -0.9944036603, -0.9945645928, -0.9947231412, -0.9948793054, -0.9950332046, -0.9951847196, -0.99533391, -0.9954807758, -0.9956252575, -0.9957674146, -0.9959072471, -0.9960446954, -0.9961798191, -0.9963126183, -0.9964430332, -0.9965711236, -0.9966968894, -0.996820271, -0.996941328, -0.9970600605, -0.9971764088, -0.9972904325, -0.9974021316, -0.9975114465, -0.9976184368, -0.997723043, -0.9978253245, -0.9979252815, -0.9980228543, -0.9981181026, -0.9982110262, -0.9983015656, -0.9983897209, -0.9984755516, -0.9985590577, -0.9986402392, -0.9987190366, -0.9987954497, -0.9988695383, -0.9989413023, -0.9990106821, -0.9990777373, -0.9991424084, -0.9992047548, -0.9992647767, -0.9993223548, -0.9993776679, -0.9994305968, -0.9994812012, -0.9995294213, -0.9995753169, -0.9996188283, -0.9996600151, -0.9996988177, -0.9997352958, -0.9997693896, -0.9998011589, -0.9998306036, -0.9998576641, -0.9998823404, -0.9999046922, -0.9999247193, -0.9999423623, -0.9999576211, -0.9999706149, -0.9999811649, -0.9999893904, -0.9999952912, -0.9999988079}; -constant float ts02[512] = {-0, -0.004601926077, -0.009203754365, -0.01380538847, -0.01840673015, -0.02300768159, -0.02760814503, -0.03220802546, -0.03680722415, -0.04140564054, -0.04600318149, -0.05059975013, -0.05519524589, -0.05978957191, -0.06438262761, -0.06897433102, -0.07356456667, -0.07815324515, -0.08274026215, -0.08732553571, -0.09190895408, -0.09649042785, -0.1010698602, -0.1056471542, -0.1102222055, -0.1147949249, -0.1193652153, -0.1239329726, -0.1284981072, -0.1330605298, -0.1376201212, -0.1421768069, -0.1467304677, -0.1512810439, -0.1558284014, -0.1603724509, -0.1649131179, -0.169450298, -0.1739838719, -0.1785137653, -0.1830398887, -0.1875621229, -0.1920803934, -0.1965945959, -0.201104641, -0.2056104094, -0.2101118416, -0.2146088183, -0.2191012353, -0.2235890329, -0.228072077, -0.2325503081, -0.2370236069, -0.241491884, -0.24595505, -0.2504130006, -0.2548656464, -0.2593129277, -0.2637546659, -0.2681908607, -0.2726213634, -0.2770460844, -0.2814649343, -0.2858778238, -0.2902846634, -0.2946853638, -0.2990798354, -0.3034679592, -0.3078496456, -0.3122248054, -0.3165933788, -0.3209552467, -0.3253102899, -0.3296584487, -0.3339996636, -0.3383337557, -0.3426607251, -0.3469804227, -0.3512927592, -0.3555976748, -0.3598950505, -0.3641847968, -0.3684668243, -0.3727410734, -0.3770074248, -0.3812657595, -0.3855160475, -0.3897581697, -0.3939920366, -0.3982175589, -0.4024346471, -0.4066432118, -0.4108431637, -0.4150344133, -0.4192169011, -0.4233904779, -0.4275550842, -0.4317106605, -0.4358570874, -0.4399942756, -0.4441221356, -0.448240608, -0.4523495734, -0.4564489722, -0.4605387151, -0.4646186829, -0.4686888158, -0.4727490246, -0.4767992198, -0.4808393419, -0.4848692417, -0.4888888896, -0.492898196, -0.4968970418, -0.5008853674, -0.5048630834, -0.5088301301, -0.5127863884, -0.5167317986, -0.5206662416, -0.5245896578, -0.5285019875, -0.5324031115, -0.5362929702, -0.5401714444, -0.5440385342, -0.5478940606, -0.5517379642, -0.5555702448, -0.5593907237, -0.5631993413, -0.566996038, -0.5707807541, -0.5745533705, -0.5783137679, -0.582062006, -0.5857978463, -0.5895212889, -0.5932322741, -0.5969306827, -0.6006164551, -0.6042895317, -0.6079497933, -0.6115971804, -0.6152315736, -0.618852973, -0.6224612594, -0.6260563731, -0.6296382546, -0.6332067847, -0.6367618442, -0.6403034925, -0.6438315511, -0.6473459601, -0.6508466601, -0.6543335915, -0.6578066945, -0.6612658501, -0.6647109985, -0.6681420207, -0.6715589762, -0.6749616265, -0.6783500314, -0.6817240715, -0.6850836873, -0.6884287596, -0.6917592287, -0.6950750947, -0.6983762383, -0.7016626, -0.7049340606, -0.7081906199, -0.7114322186, -0.7146586776, -0.7178700566, -0.7210661769, -0.724247098, -0.727412641, -0.7305627465, -0.7336974144, -0.7368165851, -0.7399200797, -0.7430079579, -0.7460801005, -0.7491363883, -0.7521768212, -0.7552013993, -0.7582098842, -0.761202395, -0.7641787529, -0.7671388984, -0.7700828314, -0.7730104327, -0.7759217024, -0.7788165212, -0.7816948295, -0.7845565677, -0.7874017358, -0.7902302146, -0.7930419445, -0.7958369255, -0.7986149788, -0.801376164, -0.8041203618, -0.8068475723, -0.8095576167, -0.8122506142, -0.8149263263, -0.8175848126, -0.8202259541, -0.8228498101, -0.8254561424, -0.8280450702, -0.8306164145, -0.8331701756, -0.8357062936, -0.838224709, -0.8407253623, -0.8432082534, -0.8456732631, -0.8481203318, -0.8505494595, -0.8529605865, -0.8553536534, -0.8577286005, -0.8600853682, -0.8624239564, -0.864744246, -0.867046237, -0.8693298697, -0.8715950847, -0.8738418221, -0.8760700822, -0.8782798052, -0.8804708719, -0.882643342, -0.8847970963, -0.8869321346, -0.8890483379, -0.8911457658, -0.893224299, -0.8952839375, -0.8973245621, -0.8993462324, -0.9013488293, -0.9033323526, -0.9052967429, -0.9072420001, -0.909168005, -0.9110747576, -0.9129621983, -0.914830327, -0.9166790843, -0.9185084105, -0.9203183055, -0.9221086502, -0.9238795042, -0.9256308079, -0.9273625016, -0.9290745854, -0.9307669401, -0.9324396253, -0.9340925217, -0.9357256889, -0.9373390079, -0.9389324784, -0.940506041, -0.9420597553, -0.9435934424, -0.9451072216, -0.946600914, -0.9480745792, -0.9495281577, -0.9509616494, -0.9523749948, -0.9537681937, -0.9551411867, -0.9564939141, -0.9578264356, -0.9591386318, -0.9604305029, -0.9617020488, -0.9629532695, -0.9641840458, -0.9653944373, -0.9665843844, -0.9677538276, -0.9689028263, -0.9700312614, -0.971139133, -0.9722265005, -0.9732932448, -0.974339366, -0.9753648639, -0.9763697386, -0.9773538709, -0.97831738, -0.9792601466, -0.9801821113, -0.9810833931, -0.9819638729, -0.9828235507, -0.9836624265, -0.9844804406, -0.9852776527, -0.9860539436, -0.9868093729, -0.9875439405, -0.988257587, -0.9889502525, -0.9896219969, -0.99027282, -0.9909026623, -0.9915114641, -0.9920992851, -0.9926661253, -0.993211925, -0.9937367439, -0.9942404628, -0.9947231412, -0.9951847196, -0.9956252575, -0.9960446954, -0.9964430332, -0.996820271, -0.9971764088, -0.9975114465, -0.9978253245, -0.9981181026, -0.9983897209, -0.9986402392, -0.9988695383, -0.9990777373, -0.9992647767, -0.9994305968, -0.9995753169, -0.9996988177, -0.9998011589, -0.9998823404, -0.9999423623, -0.9999811649, -0.9999988079, -0.9999952912, -0.9999706149, -0.9999247193, -0.9998576641, -0.9997693896, -0.9996600151, -0.9995294213, -0.9993776679, -0.9992047548, -0.9990106821, -0.9987954497, -0.9985590577, -0.9983015656, -0.9980228543, -0.997723043, -0.9974021316, -0.9970600605, -0.9966968894, -0.9963126183, -0.9959072471, -0.9954807758, -0.9950332046, -0.9945645928, -0.9940748811, -0.9935641289, -0.9930323362, -0.9924795628, -0.9919056892, -0.9913108349, -0.9906949997, -0.9900581837, -0.9894004464, -0.9887216687, -0.9880220294, -0.9873014092, -0.9865599275, -0.9857975245, -0.9850142598, -0.9842100739, -0.9833850861, -0.9825392962, -0.9816727042, -0.9807852507, -0.9798771143, -0.9789481759, -0.9779984951, -0.9770281315, -0.9760370851, -0.9750253558, -0.9739929438, -0.9729399681, -0.9718663096, -0.9707721472, -0.9696573615, -0.9685220718, -0.9673662782, -0.9661899805, -0.9649932384, -0.963776052, -0.9625384808, -0.9612804651, -0.9600021243, -0.9587034583, -0.9573845267, -0.95604527, -0.9546857476, -0.9533060193, -0.9519061446, -0.950486064, -0.9490458965, -0.9475855827, -0.9461052418, -0.9446048141, -0.9430844188, -0.9415440559, -0.9399837255, -0.9384035468, -0.9368034601, -0.9351835251, -0.9335438013, -0.9318842888, -0.9302050471, -0.9285060763, -0.9267874956, -0.9250492454, -0.9232914448, -0.9215140343, -0.919717133, -0.9179008007, -0.9160649776, -0.9142097831, -0.9123351574, -0.9104412794, -0.9085280895, -0.9065957069, -0.9046440721, -0.9026733041, -0.900683403, -0.8986744881, -0.8966464996, -0.8945994973, -0.8925335407, -0.8904487491, -0.8883450627, -0.8862225413, -0.8840812445, -0.8819212914, -0.8797426224, -0.8775452971, -0.8753293753, -0.8730949759, -0.8708420396, -0.8685706854, -0.866280973, -0.8639728427, -0.8616464734, -0.8593018055, -0.8569389582, -0.854557991, -0.8521589041, -0.8497417569, -0.8473066092, -0.84485358, -0.8423826098, -0.8398938179, -0.8373872042, -0.8348628879, -0.832320869, -0.8297612071, -0.8271840215, -0.8245893121, -0.8219771385, -0.8193475008, -0.8167005777, -0.8140363097, -0.8113548756, -0.8086561561, -0.8059403896, -0.8032075167, -0.8004576564, -0.7976908684, -0.7949071527, -0.7921065688, -0.7892892361, -0.786455214, -0.7836045027, -0.7807372212, -0.7778534293, -0.7749531269, -0.7720363736, -0.7691033483, -0.7661539912, -0.7631884217, -0.7602066994, -0.7572088242, -0.7541949749, -0.7511651516, -0.7481193542, -0.7450577617, -0.7419804335, -0.73888731, -0.7357785702, -0.7326542735, -0.72951442, -0.726359129, -0.7231884599, -0.720002532, -0.7168012857, -0.7135848403, -0.7103533745}; -constant float ts05[512] = {-0.7071067691, -0.7038452625, -0.7005687952, -0.6972774863, -0.6939714551, -0.6906507015, -0.6873153448, -0.683965385, -0.6806010008, -0.6772221923, -0.6738290191, -0.6704215407, -0.6669999361, -0.6635641456, -0.6601143479, -0.6566505432, -0.6531728506, -0.6496813297, -0.6461760402, -0.6426570415, -0.6391244531, -0.6355783343, -0.6320187449, -0.6284457445, -0.6248595119, -0.6212599874, -0.6176472902, -0.6140215397, -0.6103827953, -0.6067311168, -0.6030666232, -0.5993893147, -0.5956993103, -0.5919966698, -0.5882815719, -0.584553957, -0.5808139443, -0.5770616531, -0.573297143, -0.5695205331, -0.5657318234, -0.5619311333, -0.5581185222, -0.5542941093, -0.5504579544, -0.5466101766, -0.5427507758, -0.538879931, -0.534997642, -0.5311040282, -0.5271991491, -0.523283124, -0.5193560123, -0.5154178739, -0.5114688277, -0.5075089931, -0.5035383701, -0.4995571077, -0.4955652654, -0.4915629029, -0.4875501692, -0.4835270643, -0.479493767, -0.4754502773, -0.4713967443, -0.4673331976, -0.4632597864, -0.4591765404, -0.4550835788, -0.4509809911, -0.4468688369, -0.4427472353, -0.438616246, -0.4344759583, -0.4303264916, -0.4261678755, -0.4220002592, -0.4178237021, -0.4136383235, -0.4094441533, -0.4052413106, -0.4010298848, -0.3968099952, -0.3925816715, -0.3883450329, -0.3841001987, -0.3798471987, -0.3755861819, -0.3713172078, -0.3670403361, -0.3627557158, -0.3584634066, -0.3541635275, -0.3498561382, -0.3455413282, -0.3412192166, -0.336889863, -0.3325533569, -0.3282098472, -0.3238593638, -0.3195020258, -0.3151379228, -0.310767144, -0.3063898087, -0.3020059466, -0.2976157069, -0.2932191491, -0.2888164222, -0.2844075263, -0.27999264, -0.2755718231, -0.271145165, -0.266712755, -0.2622747123, -0.2578310966, -0.2533820271, -0.2489276081, -0.2444678992, -0.2400030196, -0.2355330586, -0.2310581058, -0.2265782654, -0.2220936269, -0.2176042795, -0.2131103128, -0.208611846, -0.2041089684, -0.1996017545, -0.1950903237, -0.1905747503, -0.1860551536, -0.1815316081, -0.1770042181, -0.1724730879, -0.167938292, -0.1633999497, -0.1588581502, -0.1543129683, -0.1497645378, -0.1452129185, -0.1406582445, -0.1361005753, -0.1315400302, -0.1269766986, -0.1224106774, -0.1178420633, -0.1132709533, -0.1086974442, -0.1041216329, -0.09954361618, -0.09496349841, -0.09038136154, -0.08579730988, -0.08121144772, -0.07662386447, -0.07203464955, -0.06744392216, -0.06285175681, -0.05825826526, -0.05366353691, -0.04906767607, -0.04447077215, -0.03987292573, -0.03527423739, -0.030674804, -0.02607471868, -0.02147408016, -0.01687298715, -0.01227153838, -0.007669828832, -0.003067956772, 0.001533980132, 0.006135884672, 0.01073765941, 0.01533920597, 0.01994042844, 0.02454122901, 0.02914150804, 0.0337411724, 0.03834012151, 0.0429382585, 0.04753548279, 0.05213170499, 0.05672682077, 0.061320737, 0.06591334939, 0.07050457597, 0.07509429753, 0.07968243957, 0.08426889032, 0.08885355294, 0.09343633801, 0.09801714122, 0.1025958657, 0.1071724221, 0.1117467135, 0.1163186282, 0.1208880842, 0.1254549772, 0.1300192177, 0.1345807016, 0.1391393393, 0.1436950266, 0.1482476741, 0.1527971923, 0.1573434621, 0.161886394, 0.1664258987, 0.1709618866, 0.1754942536, 0.1800228953, 0.1845477372, 0.1890686601, 0.1935855895, 0.1980984062, 0.2026070356, 0.2071113735, 0.2116113305, 0.2161068022, 0.2205976844, 0.2250839174, 0.2295653671, 0.234041959, 0.2385135889, 0.2429801822, 0.2474416196, 0.2518978119, 0.2563486695, 0.2607941031, 0.2652340233, 0.2696683109, 0.2740969062, 0.27851969, 0.282936573, 0.2873474658, 0.291752249, 0.296150893, 0.3005432487, 0.3049292266, 0.3093087673, 0.3136817515, 0.3180480897, 0.3224076927, 0.3267604411, 0.3311063051, 0.3354451358, 0.3397768736, 0.344101429, 0.3484186828, 0.3527285457, 0.3570309579, 0.3613258004, 0.3656129837, 0.3698924482, 0.3741640747, 0.3784277439, 0.3826834261, 0.3869310021, 0.3911703825, 0.3954014778, 0.3996241987, 0.4038384557, 0.4080441594, 0.4122412205, 0.4164295495, 0.4206090868, 0.4247796834, 0.4289412796, 0.433093816, 0.4372371733, 0.4413712621, 0.4454960227, 0.449611336, 0.4537171125, 0.4578132927, 0.4618997872, 0.4659765065, 0.4700433314, 0.4741002023, 0.4781470597, 0.4821837842, 0.4862102866, 0.4902264774, 0.4942322969, 0.4982276559, 0.5022124648, 0.5061866641, 0.510150075, 0.514102757, 0.5180445313, 0.5219752789, 0.5258949995, 0.5298036337, 0.5337010026, 0.5375870466, 0.5414617658, 0.5453249812, 0.5491766334, 0.5530167222, 0.5568450093, 0.5606615543, 0.564466238, 0.5682589412, 0.5720396042, 0.5758081675, 0.5795645714, 0.5833086371, 0.5870403647, 0.5907596946, 0.5944665074, 0.5981606841, 0.6018422246, 0.6055110693, 0.6091670394, 0.6128100753, 0.616440177, 0.6200572252, 0.6236611009, 0.6272518039, 0.630829215, 0.6343932748, 0.6379439235, 0.6414810419, 0.6450045109, 0.64851439, 0.65201056, 0.6554928422, 0.6589612961, 0.6624158025, 0.6658562422, 0.6692826152, 0.6726947427, 0.6760926843, 0.6794763207, 0.6828455329, 0.6862003207, 0.689540565, 0.6928661466, 0.696177125, 0.6994733214, 0.7027547359, 0.7060212493, 0.7092728019, 0.7125093937, 0.7157308459, 0.718937099, 0.7221282125, 0.7253039479, 0.728464365, 0.7316094041, 0.7347388864, 0.7378528118, 0.7409511209, 0.7440337539, 0.7471005917, 0.7501516342, 0.7531868219, 0.756205976, 0.7592092156, 0.7621963024, 0.7651672363, 0.7681220174, 0.7710605264, 0.7739827037, 0.7768884897, 0.7797777653, 0.7826505899, 0.7855068445, 0.7883464098, 0.7911693454, 0.7939754725, 0.796764791, 0.7995372415, 0.8022928238, 0.8050313592, 0.8077528477, 0.81045717, 0.8131443858, 0.8158144355, 0.8184671402, 0.8211025, 0.8237205148, 0.8263210654, 0.8289040923, 0.8314695954, 0.8340175152, 0.8365477324, 0.8390602469, 0.8415549994, 0.8440318704, 0.8464909196, 0.8489320278, 0.851355195, 0.8537603021, 0.8561473489, 0.8585162163, 0.8608669639, 0.8631994128, 0.8655136228, 0.8678094745, 0.8700869679, 0.8723460436, 0.8745866418, 0.8768087029, 0.8790122271, 0.8811970949, 0.8833633661, 0.8855108619, 0.8876396418, 0.8897495866, 0.8918406963, 0.893912971, 0.8959662318, 0.898000598, 0.9000158906, 0.9020121694, 0.903989315, 0.905947268, 0.9078860879, 0.9098057151, 0.9117060304, 0.9135870337, 0.9154487252, 0.9172909856, 0.9191138744, 0.920917213, 0.9227011204, 0.9244654775, 0.9262102246, 0.9279354215, 0.9296408892, 0.9313266873, 0.932992816, 0.9346391559, 0.9362656474, 0.9378723502, 0.9394592047, 0.9410261512, 0.9425731897, 0.9441002607, 0.9456073046, 0.9470943809, 0.9485613704, 0.9500082731, 0.9514350295, 0.9528416395, 0.9542281032, 0.9555943608, 0.9569403529, 0.9582660794, 0.9595715404, 0.9608566165, 0.9621214271, 0.9633657932, 0.9645897746, 0.9657933712, 0.9669764638, 0.968139112, 0.9692812562, 0.9704028368, 0.9715039134, 0.9725843668, 0.9736442566, 0.9746835232, 0.975702107, 0.9767000675, 0.9776773453, 0.9786339402, 0.9795697927, 0.9804848433, 0.9813792109, 0.982252717, 0.9831054807, 0.9839374423, 0.9847484827, 0.9855387211, 0.9863080978, 0.9870565534, 0.9877841473, 0.9884908199, 0.9891765118, 0.9898412824, 0.9904850721, 0.9911079407, 0.9917097688, 0.992290616, 0.9928504229, 0.9933891892, 0.9939069748, 0.9944036603, 0.9948793054, 0.99533391, 0.9957674146, 0.9961798191, 0.9965711236, 0.996941328, 0.9972904325, 0.9976184368, 0.9979252815, 0.9982110262, 0.9984755516, 0.9987190366, 0.9989413023, 0.9991424084, 0.9993223548, 0.9994812012, 0.9996188283, 0.9997352958, 0.9998306036, 0.9999046922, 0.9999576211, 0.9999893904}; -constant float ts10[512] = {-0, -0.01227153838, -0.02454122901, -0.03680722415, -0.04906767607, -0.061320737, -0.07356456667, -0.08579730988, -0.09801714122, -0.1102222055, -0.1224106774, -0.1345807016, -0.1467304677, -0.1588581502, -0.1709618866, -0.1830398887, -0.1950903237, -0.2071113735, -0.2191012353, -0.2310581058, -0.2429801822, -0.2548656464, -0.266712755, -0.27851969, -0.2902846634, -0.3020059466, -0.3136817515, -0.3253102899, -0.336889863, -0.3484186828, -0.3598950505, -0.3713172078, -0.3826834261, -0.3939920366, -0.4052413106, -0.4164295495, -0.4275550842, -0.438616246, -0.449611336, -0.4605387151, -0.4713967443, -0.4821837842, -0.492898196, -0.5035383701, -0.514102757, -0.5245896578, -0.534997642, -0.5453249812, -0.5555702448, -0.5657318234, -0.5758081675, -0.5857978463, -0.5956993103, -0.6055110693, -0.6152315736, -0.6248595119, -0.6343932748, -0.6438315511, -0.6531728506, -0.6624158025, -0.6715589762, -0.6806010008, -0.689540565, -0.6983762383, -0.7071067691, -0.7157308459, -0.724247098, -0.7326542735, -0.7409511209, -0.7491363883, -0.7572088242, -0.7651672363, -0.7730104327, -0.7807372212, -0.7883464098, -0.7958369255, -0.8032075167, -0.81045717, -0.8175848126, -0.8245893121, -0.8314695954, -0.838224709, -0.84485358, -0.851355195, -0.8577286005, -0.8639728427, -0.8700869679, -0.8760700822, -0.8819212914, -0.8876396418, -0.893224299, -0.8986744881, -0.903989315, -0.909168005, -0.9142097831, -0.9191138744, -0.9238795042, -0.9285060763, -0.932992816, -0.9373390079, -0.9415440559, -0.9456073046, -0.9495281577, -0.9533060193, -0.9569403529, -0.9604305029, -0.963776052, -0.9669764638, -0.9700312614, -0.9729399681, -0.975702107, -0.97831738, -0.9807852507, -0.9831054807, -0.9852776527, -0.9873014092, -0.9891765118, -0.9909026623, -0.9924795628, -0.9939069748, -0.9951847196, -0.9963126183, -0.9972904325, -0.9981181026, -0.9987954497, -0.9993223548, -0.9996988177, -0.9999247193, -1, -0.9999247193, -0.9996988177, -0.9993223548, -0.9987954497, -0.9981181026, -0.9972904325, -0.9963126183, -0.9951847196, -0.9939069748, -0.9924795628, -0.9909026623, -0.9891765118, -0.9873014092, -0.9852776527, -0.9831054807, -0.9807852507, -0.97831738, -0.975702107, -0.9729399681, -0.9700312614, -0.9669764638, -0.963776052, -0.9604305029, -0.9569403529, -0.9533060193, -0.9495281577, -0.9456073046, -0.9415440559, -0.9373390079, -0.932992816, -0.9285060763, -0.9238795042, -0.9191138744, -0.9142097831, -0.909168005, -0.903989315, -0.8986744881, -0.893224299, -0.8876396418, -0.8819212914, -0.8760700822, -0.8700869679, -0.8639728427, -0.8577286005, -0.851355195, -0.84485358, -0.838224709, -0.8314695954, -0.8245893121, -0.8175848126, -0.81045717, -0.8032075167, -0.7958369255, -0.7883464098, -0.7807372212, -0.7730104327, -0.7651672363, -0.7572088242, -0.7491363883, -0.7409511209, -0.7326542735, -0.724247098, -0.7157308459, -0.7071067691, -0.6983762383, -0.689540565, -0.6806010008, -0.6715589762, -0.6624158025, -0.6531728506, -0.6438315511, -0.6343932748, -0.6248595119, -0.6152315736, -0.6055110693, -0.5956993103, -0.5857978463, -0.5758081675, -0.5657318234, -0.5555702448, -0.5453249812, -0.534997642, -0.5245896578, -0.514102757, -0.5035383701, -0.492898196, -0.4821837842, -0.4713967443, -0.4605387151, -0.449611336, -0.438616246, -0.4275550842, -0.4164295495, -0.4052413106, -0.3939920366, -0.3826834261, -0.3713172078, -0.3598950505, -0.3484186828, -0.336889863, -0.3253102899, -0.3136817515, -0.3020059466, -0.2902846634, -0.27851969, -0.266712755, -0.2548656464, -0.2429801822, -0.2310581058, -0.2191012353, -0.2071113735, -0.1950903237, -0.1830398887, -0.1709618866, -0.1588581502, -0.1467304677, -0.1345807016, -0.1224106774, -0.1102222055, -0.09801714122, -0.08579730988, -0.07356456667, -0.061320737, -0.04906767607, -0.03680722415, -0.02454122901, -0.01227153838, -0, -0.01227153838, -0.02454122901, -0.03680722415, -0.04906767607, -0.061320737, -0.07356456667, -0.08579730988, -0.09801714122, -0.1102222055, -0.1224106774, -0.1345807016, -0.1467304677, -0.1588581502, -0.1709618866, -0.1830398887, -0.1950903237, -0.2071113735, -0.2191012353, -0.2310581058, -0.2429801822, -0.2548656464, -0.266712755, -0.27851969, -0.2902846634, -0.3020059466, -0.3136817515, -0.3253102899, -0.336889863, -0.3484186828, -0.3598950505, -0.3713172078, -0.3826834261, -0.3939920366, -0.4052413106, -0.4164295495, -0.4275550842, -0.438616246, -0.449611336, -0.4605387151, -0.4713967443, -0.4821837842, -0.492898196, -0.5035383701, -0.514102757, -0.5245896578, -0.534997642, -0.5453249812, -0.5555702448, -0.5657318234, -0.5758081675, -0.5857978463, -0.5956993103, -0.6055110693, -0.6152315736, -0.6248595119, -0.6343932748, -0.6438315511, -0.6531728506, -0.6624158025, -0.6715589762, -0.6806010008, -0.689540565, -0.6983762383, -0.7071067691, -0.7157308459, -0.724247098, -0.7326542735, -0.7409511209, -0.7491363883, -0.7572088242, -0.7651672363, -0.7730104327, -0.7807372212, -0.7883464098, -0.7958369255, -0.8032075167, -0.81045717, -0.8175848126, -0.8245893121, -0.8314695954, -0.838224709, -0.84485358, -0.851355195, -0.8577286005, -0.8639728427, -0.8700869679, -0.8760700822, -0.8819212914, -0.8876396418, -0.893224299, -0.8986744881, -0.903989315, -0.909168005, -0.9142097831, -0.9191138744, -0.9238795042, -0.9285060763, -0.932992816, -0.9373390079, -0.9415440559, -0.9456073046, -0.9495281577, -0.9533060193, -0.9569403529, -0.9604305029, -0.963776052, -0.9669764638, -0.9700312614, -0.9729399681, -0.975702107, -0.97831738, -0.9807852507, -0.9831054807, -0.9852776527, -0.9873014092, -0.9891765118, -0.9909026623, -0.9924795628, -0.9939069748, -0.9951847196, -0.9963126183, -0.9972904325, -0.9981181026, -0.9987954497, -0.9993223548, -0.9996988177, -0.9999247193, -1, -0.9999247193, -0.9996988177, -0.9993223548, -0.9987954497, -0.9981181026, -0.9972904325, -0.9963126183, -0.9951847196, -0.9939069748, -0.9924795628, -0.9909026623, -0.9891765118, -0.9873014092, -0.9852776527, -0.9831054807, -0.9807852507, -0.97831738, -0.975702107, -0.9729399681, -0.9700312614, -0.9669764638, -0.963776052, -0.9604305029, -0.9569403529, -0.9533060193, -0.9495281577, -0.9456073046, -0.9415440559, -0.9373390079, -0.932992816, -0.9285060763, -0.9238795042, -0.9191138744, -0.9142097831, -0.909168005, -0.903989315, -0.8986744881, -0.893224299, -0.8876396418, -0.8819212914, -0.8760700822, -0.8700869679, -0.8639728427, -0.8577286005, -0.851355195, -0.84485358, -0.838224709, -0.8314695954, -0.8245893121, -0.8175848126, -0.81045717, -0.8032075167, -0.7958369255, -0.7883464098, -0.7807372212, -0.7730104327, -0.7651672363, -0.7572088242, -0.7491363883, -0.7409511209, -0.7326542735, -0.724247098, -0.7157308459, -0.7071067691, -0.6983762383, -0.689540565, -0.6806010008, -0.6715589762, -0.6624158025, -0.6531728506, -0.6438315511, -0.6343932748, -0.6248595119, -0.6152315736, -0.6055110693, -0.5956993103, -0.5857978463, -0.5758081675, -0.5657318234, -0.5555702448, -0.5453249812, -0.534997642, -0.5245896578, -0.514102757, -0.5035383701, -0.492898196, -0.4821837842, -0.4713967443, -0.4605387151, -0.449611336, -0.438616246, -0.4275550842, -0.4164295495, -0.4052413106, -0.3939920366, -0.3826834261, -0.3713172078, -0.3598950505, -0.3484186828, -0.336889863, -0.3253102899, -0.3136817515, -0.3020059466, -0.2902846634, -0.27851969, -0.266712755, -0.2548656464, -0.2429801822, -0.2310581058, -0.2191012353, -0.2071113735, -0.1950903237, -0.1830398887, -0.1709618866, -0.1588581502, -0.1467304677, -0.1345807016, -0.1224106774, -0.1102222055, -0.09801714122, -0.08579730988, -0.07356456667, -0.061320737, -0.04906767607, -0.03680722415, -0.02454122901, -0.01227153838}; -constant float ts13[512] = {-0, -0.01227153838, -0.02454122901, -0.03680722415, -0.04906767607, -0.061320737, -0.07356456667, -0.08579730988, -0.09801714122, -0.1102222055, -0.1224106774, -0.1345807016, -0.1467304677, -0.1588581502, -0.1709618866, -0.1830398887, -0.1950903237, -0.2071113735, -0.2191012353, -0.2310581058, -0.2429801822, -0.2548656464, -0.266712755, -0.27851969, -0.2902846634, -0.3020059466, -0.3136817515, -0.3253102899, -0.336889863, -0.3484186828, -0.3598950505, -0.3713172078, -0.3826834261, -0.3939920366, -0.4052413106, -0.4164295495, -0.4275550842, -0.438616246, -0.449611336, -0.4605387151, -0.4713967443, -0.4821837842, -0.492898196, -0.5035383701, -0.514102757, -0.5245896578, -0.534997642, -0.5453249812, -0.5555702448, -0.5657318234, -0.5758081675, -0.5857978463, -0.5956993103, -0.6055110693, -0.6152315736, -0.6248595119, -0.6343932748, -0.6438315511, -0.6531728506, -0.6624158025, -0.6715589762, -0.6806010008, -0.689540565, -0.6983762383, -0.7071067691, -0.7157308459, -0.724247098, -0.7326542735, -0.7409511209, -0.7491363883, -0.7572088242, -0.7651672363, -0.7730104327, -0.7807372212, -0.7883464098, -0.7958369255, -0.8032075167, -0.81045717, -0.8175848126, -0.8245893121, -0.8314695954, -0.838224709, -0.84485358, -0.851355195, -0.8577286005, -0.8639728427, -0.8700869679, -0.8760700822, -0.8819212914, -0.8876396418, -0.893224299, -0.8986744881, -0.903989315, -0.909168005, -0.9142097831, -0.9191138744, -0.9238795042, -0.9285060763, -0.932992816, -0.9373390079, -0.9415440559, -0.9456073046, -0.9495281577, -0.9533060193, -0.9569403529, -0.9604305029, -0.963776052, -0.9669764638, -0.9700312614, -0.9729399681, -0.975702107, -0.97831738, -0.9807852507, -0.9831054807, -0.9852776527, -0.9873014092, -0.9891765118, -0.9909026623, -0.9924795628, -0.9939069748, -0.9951847196, -0.9963126183, -0.9972904325, -0.9981181026, -0.9987954497, -0.9993223548, -0.9996988177, -0.9999247193, -1, -0.9999247193, -0.9996988177, -0.9993223548, -0.9987954497, -0.9981181026, -0.9972904325, -0.9963126183, -0.9951847196, -0.9939069748, -0.9924795628, -0.9909026623, -0.9891765118, -0.9873014092, -0.9852776527, -0.9831054807, -0.9807852507, -0.97831738, -0.975702107, -0.9729399681, -0.9700312614, -0.9669764638, -0.963776052, -0.9604305029, -0.9569403529, -0.9533060193, -0.9495281577, -0.9456073046, -0.9415440559, -0.9373390079, -0.932992816, -0.9285060763, -0.9238795042, -0.9191138744, -0.9142097831, -0.909168005, -0.903989315, -0.8986744881, -0.893224299, -0.8876396418, -0.8819212914, -0.8760700822, -0.8700869679, -0.8639728427, -0.8577286005, -0.851355195, -0.84485358, -0.838224709, -0.8314695954, -0.8245893121, -0.8175848126, -0.81045717, -0.8032075167, -0.7958369255, -0.7883464098, -0.7807372212, -0.7730104327, -0.7651672363, -0.7572088242, -0.7491363883, -0.7409511209, -0.7326542735, -0.724247098, -0.7157308459, -0.7071067691, -0.6983762383, -0.689540565, -0.6806010008, -0.6715589762, -0.6624158025, -0.6531728506, -0.6438315511, -0.6343932748, -0.6248595119, -0.6152315736, -0.6055110693, -0.5956993103, -0.5857978463, -0.5758081675, -0.5657318234, -0.5555702448, -0.5453249812, -0.534997642, -0.5245896578, -0.514102757, -0.5035383701, -0.492898196, -0.4821837842, -0.4713967443, -0.4605387151, -0.449611336, -0.438616246, -0.4275550842, -0.4164295495, -0.4052413106, -0.3939920366, -0.3826834261, -0.3713172078, -0.3598950505, -0.3484186828, -0.336889863, -0.3253102899, -0.3136817515, -0.3020059466, -0.2902846634, -0.27851969, -0.266712755, -0.2548656464, -0.2429801822, -0.2310581058, -0.2191012353, -0.2071113735, -0.1950903237, -0.1830398887, -0.1709618866, -0.1588581502, -0.1467304677, -0.1345807016, -0.1224106774, -0.1102222055, -0.09801714122, -0.08579730988, -0.07356456667, -0.061320737, -0.04906767607, -0.03680722415, -0.02454122901, -0.01227153838, -0, -0.01227153838, -0.02454122901, -0.03680722415, -0.04906767607, -0.061320737, -0.07356456667, -0.08579730988, -0.09801714122, -0.1102222055, -0.1224106774, -0.1345807016, -0.1467304677, -0.1588581502, -0.1709618866, -0.1830398887, -0.1950903237, -0.2071113735, -0.2191012353, -0.2310581058, -0.2429801822, -0.2548656464, -0.266712755, -0.27851969, -0.2902846634, -0.3020059466, -0.3136817515, -0.3253102899, -0.336889863, -0.3484186828, -0.3598950505, -0.3713172078, -0.3826834261, -0.3939920366, -0.4052413106, -0.4164295495, -0.4275550842, -0.438616246, -0.449611336, -0.4605387151, -0.4713967443, -0.4821837842, -0.492898196, -0.5035383701, -0.514102757, -0.5245896578, -0.534997642, -0.5453249812, -0.5555702448, -0.5657318234, -0.5758081675, -0.5857978463, -0.5956993103, -0.6055110693, -0.6152315736, -0.6248595119, -0.6343932748, -0.6438315511, -0.6531728506, -0.6624158025, -0.6715589762, -0.6806010008, -0.689540565, -0.6983762383, -0.7071067691, -0.7157308459, -0.724247098, -0.7326542735, -0.7409511209, -0.7491363883, -0.7572088242, -0.7651672363, -0.7730104327, -0.7807372212, -0.7883464098, -0.7958369255, -0.8032075167, -0.81045717, -0.8175848126, -0.8245893121, -0.8314695954, -0.838224709, -0.84485358, -0.851355195, -0.8577286005, -0.8639728427, -0.8700869679, -0.8760700822, -0.8819212914, -0.8876396418, -0.893224299, -0.8986744881, -0.903989315, -0.909168005, -0.9142097831, -0.9191138744, -0.9238795042, -0.9285060763, -0.932992816, -0.9373390079, -0.9415440559, -0.9456073046, -0.9495281577, -0.9533060193, -0.9569403529, -0.9604305029, -0.963776052, -0.9669764638, -0.9700312614, -0.9729399681, -0.975702107, -0.97831738, -0.9807852507, -0.9831054807, -0.9852776527, -0.9873014092, -0.9891765118, -0.9909026623, -0.9924795628, -0.9939069748, -0.9951847196, -0.9963126183, -0.9972904325, -0.9981181026, -0.9987954497, -0.9993223548, -0.9996988177, -0.9999247193, -1, -0.9999247193, -0.9996988177, -0.9993223548, -0.9987954497, -0.9981181026, -0.9972904325, -0.9963126183, -0.9951847196, -0.9939069748, -0.9924795628, -0.9909026623, -0.9891765118, -0.9873014092, -0.9852776527, -0.9831054807, -0.9807852507, -0.97831738, -0.975702107, -0.9729399681, -0.9700312614, -0.9669764638, -0.963776052, -0.9604305029, -0.9569403529, -0.9533060193, -0.9495281577, -0.9456073046, -0.9415440559, -0.9373390079, -0.932992816, -0.9285060763, -0.9238795042, -0.9191138744, -0.9142097831, -0.909168005, -0.903989315, -0.8986744881, -0.893224299, -0.8876396418, -0.8819212914, -0.8760700822, -0.8700869679, -0.8639728427, -0.8577286005, -0.851355195, -0.84485358, -0.838224709, -0.8314695954, -0.8245893121, -0.8175848126, -0.81045717, -0.8032075167, -0.7958369255, -0.7883464098, -0.7807372212, -0.7730104327, -0.7651672363, -0.7572088242, -0.7491363883, -0.7409511209, -0.7326542735, -0.724247098, -0.7157308459, -0.7071067691, -0.6983762383, -0.689540565, -0.6806010008, -0.6715589762, -0.6624158025, -0.6531728506, -0.6438315511, -0.6343932748, -0.6248595119, -0.6152315736, -0.6055110693, -0.5956993103, -0.5857978463, -0.5758081675, -0.5657318234, -0.5555702448, -0.5453249812, -0.534997642, -0.5245896578, -0.514102757, -0.5035383701, -0.492898196, -0.4821837842, -0.4713967443, -0.4605387151, -0.449611336, -0.438616246, -0.4275550842, -0.4164295495, -0.4052413106, -0.3939920366, -0.3826834261, -0.3713172078, -0.3598950505, -0.3484186828, -0.336889863, -0.3253102899, -0.3136817515, -0.3020059466, -0.2902846634, -0.27851969, -0.266712755, -0.2548656464, -0.2429801822, -0.2310581058, -0.2191012353, -0.2071113735, -0.1950903237, -0.1830398887, -0.1709618866, -0.1588581502, -0.1467304677, -0.1345807016, -0.1224106774, -0.1102222055, -0.09801714122, -0.08579730988, -0.07356456667, -0.061320737, -0.04906767607, -0.03680722415, -0.02454122901, -0.01227153838}; -constant float ts11[512] = {-0, -0.006135884672, -0.01227153838, -0.01840673015, -0.02454122901, -0.030674804, -0.03680722415, -0.0429382585, -0.04906767607, -0.05519524589, -0.061320737, -0.06744392216, -0.07356456667, -0.07968243957, -0.08579730988, -0.09190895408, -0.09801714122, -0.1041216329, -0.1102222055, -0.1163186282, -0.1224106774, -0.1284981072, -0.1345807016, -0.1406582445, -0.1467304677, -0.1527971923, -0.1588581502, -0.1649131179, -0.1709618866, -0.1770042181, -0.1830398887, -0.1890686601, -0.1950903237, -0.201104641, -0.2071113735, -0.2131103128, -0.2191012353, -0.2250839174, -0.2310581058, -0.2370236069, -0.2429801822, -0.2489276081, -0.2548656464, -0.2607941031, -0.266712755, -0.2726213634, -0.27851969, -0.2844075263, -0.2902846634, -0.296150893, -0.3020059466, -0.3078496456, -0.3136817515, -0.3195020258, -0.3253102899, -0.3311063051, -0.336889863, -0.3426607251, -0.3484186828, -0.3541635275, -0.3598950505, -0.3656129837, -0.3713172078, -0.3770074248, -0.3826834261, -0.3883450329, -0.3939920366, -0.3996241987, -0.4052413106, -0.4108431637, -0.4164295495, -0.4220002592, -0.4275550842, -0.433093816, -0.438616246, -0.4441221356, -0.449611336, -0.4550835788, -0.4605387151, -0.4659765065, -0.4713967443, -0.4767992198, -0.4821837842, -0.4875501692, -0.492898196, -0.4982276559, -0.5035383701, -0.5088301301, -0.514102757, -0.5193560123, -0.5245896578, -0.5298036337, -0.534997642, -0.5401714444, -0.5453249812, -0.5504579544, -0.5555702448, -0.5606615543, -0.5657318234, -0.5707807541, -0.5758081675, -0.5808139443, -0.5857978463, -0.5907596946, -0.5956993103, -0.6006164551, -0.6055110693, -0.6103827953, -0.6152315736, -0.6200572252, -0.6248595119, -0.6296382546, -0.6343932748, -0.6391244531, -0.6438315511, -0.64851439, -0.6531728506, -0.6578066945, -0.6624158025, -0.6669999361, -0.6715589762, -0.6760926843, -0.6806010008, -0.6850836873, -0.689540565, -0.6939714551, -0.6983762383, -0.7027547359, -0.7071067691, -0.7114322186, -0.7157308459, -0.720002532, -0.724247098, -0.728464365, -0.7326542735, -0.7368165851, -0.7409511209, -0.7450577617, -0.7491363883, -0.7531868219, -0.7572088242, -0.761202395, -0.7651672363, -0.7691033483, -0.7730104327, -0.7768884897, -0.7807372212, -0.7845565677, -0.7883464098, -0.7921065688, -0.7958369255, -0.7995372415, -0.8032075167, -0.8068475723, -0.81045717, -0.8140363097, -0.8175848126, -0.8211025, -0.8245893121, -0.8280450702, -0.8314695954, -0.8348628879, -0.838224709, -0.8415549994, -0.84485358, -0.8481203318, -0.851355195, -0.854557991, -0.8577286005, -0.8608669639, -0.8639728427, -0.867046237, -0.8700869679, -0.8730949759, -0.8760700822, -0.8790122271, -0.8819212914, -0.8847970963, -0.8876396418, -0.8904487491, -0.893224299, -0.8959662318, -0.8986744881, -0.9013488293, -0.903989315, -0.9065957069, -0.909168005, -0.9117060304, -0.9142097831, -0.9166790843, -0.9191138744, -0.9215140343, -0.9238795042, -0.9262102246, -0.9285060763, -0.9307669401, -0.932992816, -0.9351835251, -0.9373390079, -0.9394592047, -0.9415440559, -0.9435934424, -0.9456073046, -0.9475855827, -0.9495281577, -0.9514350295, -0.9533060193, -0.9551411867, -0.9569403529, -0.9587034583, -0.9604305029, -0.9621214271, -0.963776052, -0.9653944373, -0.9669764638, -0.9685220718, -0.9700312614, -0.9715039134, -0.9729399681, -0.974339366, -0.975702107, -0.9770281315, -0.97831738, -0.9795697927, -0.9807852507, -0.9819638729, -0.9831054807, -0.9842100739, -0.9852776527, -0.9863080978, -0.9873014092, -0.988257587, -0.9891765118, -0.9900581837, -0.9909026623, -0.9917097688, -0.9924795628, -0.993211925, -0.9939069748, -0.9945645928, -0.9951847196, -0.9957674146, -0.9963126183, -0.996820271, -0.9972904325, -0.997723043, -0.9981181026, -0.9984755516, -0.9987954497, -0.9990777373, -0.9993223548, -0.9995294213, -0.9996988177, -0.9998306036, -0.9999247193, -0.9999811649, -0, -0.006135884672, -0.01227153838, -0.01840673015, -0.02454122901, -0.030674804, -0.03680722415, -0.0429382585, -0.04906767607, -0.05519524589, -0.061320737, -0.06744392216, -0.07356456667, -0.07968243957, -0.08579730988, -0.09190895408, -0.09801714122, -0.1041216329, -0.1102222055, -0.1163186282, -0.1224106774, -0.1284981072, -0.1345807016, -0.1406582445, -0.1467304677, -0.1527971923, -0.1588581502, -0.1649131179, -0.1709618866, -0.1770042181, -0.1830398887, -0.1890686601, -0.1950903237, -0.201104641, -0.2071113735, -0.2131103128, -0.2191012353, -0.2250839174, -0.2310581058, -0.2370236069, -0.2429801822, -0.2489276081, -0.2548656464, -0.2607941031, -0.266712755, -0.2726213634, -0.27851969, -0.2844075263, -0.2902846634, -0.296150893, -0.3020059466, -0.3078496456, -0.3136817515, -0.3195020258, -0.3253102899, -0.3311063051, -0.336889863, -0.3426607251, -0.3484186828, -0.3541635275, -0.3598950505, -0.3656129837, -0.3713172078, -0.3770074248, -0.3826834261, -0.3883450329, -0.3939920366, -0.3996241987, -0.4052413106, -0.4108431637, -0.4164295495, -0.4220002592, -0.4275550842, -0.433093816, -0.438616246, -0.4441221356, -0.449611336, -0.4550835788, -0.4605387151, -0.4659765065, -0.4713967443, -0.4767992198, -0.4821837842, -0.4875501692, -0.492898196, -0.4982276559, -0.5035383701, -0.5088301301, -0.514102757, -0.5193560123, -0.5245896578, -0.5298036337, -0.534997642, -0.5401714444, -0.5453249812, -0.5504579544, -0.5555702448, -0.5606615543, -0.5657318234, -0.5707807541, -0.5758081675, -0.5808139443, -0.5857978463, -0.5907596946, -0.5956993103, -0.6006164551, -0.6055110693, -0.6103827953, -0.6152315736, -0.6200572252, -0.6248595119, -0.6296382546, -0.6343932748, -0.6391244531, -0.6438315511, -0.64851439, -0.6531728506, -0.6578066945, -0.6624158025, -0.6669999361, -0.6715589762, -0.6760926843, -0.6806010008, -0.6850836873, -0.689540565, -0.6939714551, -0.6983762383, -0.7027547359, -0.7071067691, -0.7114322186, -0.7157308459, -0.720002532, -0.724247098, -0.728464365, -0.7326542735, -0.7368165851, -0.7409511209, -0.7450577617, -0.7491363883, -0.7531868219, -0.7572088242, -0.761202395, -0.7651672363, -0.7691033483, -0.7730104327, -0.7768884897, -0.7807372212, -0.7845565677, -0.7883464098, -0.7921065688, -0.7958369255, -0.7995372415, -0.8032075167, -0.8068475723, -0.81045717, -0.8140363097, -0.8175848126, -0.8211025, -0.8245893121, -0.8280450702, -0.8314695954, -0.8348628879, -0.838224709, -0.8415549994, -0.84485358, -0.8481203318, -0.851355195, -0.854557991, -0.8577286005, -0.8608669639, -0.8639728427, -0.867046237, -0.8700869679, -0.8730949759, -0.8760700822, -0.8790122271, -0.8819212914, -0.8847970963, -0.8876396418, -0.8904487491, -0.893224299, -0.8959662318, -0.8986744881, -0.9013488293, -0.903989315, -0.9065957069, -0.909168005, -0.9117060304, -0.9142097831, -0.9166790843, -0.9191138744, -0.9215140343, -0.9238795042, -0.9262102246, -0.9285060763, -0.9307669401, -0.932992816, -0.9351835251, -0.9373390079, -0.9394592047, -0.9415440559, -0.9435934424, -0.9456073046, -0.9475855827, -0.9495281577, -0.9514350295, -0.9533060193, -0.9551411867, -0.9569403529, -0.9587034583, -0.9604305029, -0.9621214271, -0.963776052, -0.9653944373, -0.9669764638, -0.9685220718, -0.9700312614, -0.9715039134, -0.9729399681, -0.974339366, -0.975702107, -0.9770281315, -0.97831738, -0.9795697927, -0.9807852507, -0.9819638729, -0.9831054807, -0.9842100739, -0.9852776527, -0.9863080978, -0.9873014092, -0.988257587, -0.9891765118, -0.9900581837, -0.9909026623, -0.9917097688, -0.9924795628, -0.993211925, -0.9939069748, -0.9945645928, -0.9951847196, -0.9957674146, -0.9963126183, -0.996820271, -0.9972904325, -0.997723043, -0.9981181026, -0.9984755516, -0.9987954497, -0.9990777373, -0.9993223548, -0.9995294213, -0.9996988177, -0.9998306036, -0.9999247193, -0.9999811649}; -constant float ts14[512] = {-0, -0.006135884672, -0.01227153838, -0.01840673015, -0.02454122901, -0.030674804, -0.03680722415, -0.0429382585, -0.04906767607, -0.05519524589, -0.061320737, -0.06744392216, -0.07356456667, -0.07968243957, -0.08579730988, -0.09190895408, -0.09801714122, -0.1041216329, -0.1102222055, -0.1163186282, -0.1224106774, -0.1284981072, -0.1345807016, -0.1406582445, -0.1467304677, -0.1527971923, -0.1588581502, -0.1649131179, -0.1709618866, -0.1770042181, -0.1830398887, -0.1890686601, -0.1950903237, -0.201104641, -0.2071113735, -0.2131103128, -0.2191012353, -0.2250839174, -0.2310581058, -0.2370236069, -0.2429801822, -0.2489276081, -0.2548656464, -0.2607941031, -0.266712755, -0.2726213634, -0.27851969, -0.2844075263, -0.2902846634, -0.296150893, -0.3020059466, -0.3078496456, -0.3136817515, -0.3195020258, -0.3253102899, -0.3311063051, -0.336889863, -0.3426607251, -0.3484186828, -0.3541635275, -0.3598950505, -0.3656129837, -0.3713172078, -0.3770074248, -0.3826834261, -0.3883450329, -0.3939920366, -0.3996241987, -0.4052413106, -0.4108431637, -0.4164295495, -0.4220002592, -0.4275550842, -0.433093816, -0.438616246, -0.4441221356, -0.449611336, -0.4550835788, -0.4605387151, -0.4659765065, -0.4713967443, -0.4767992198, -0.4821837842, -0.4875501692, -0.492898196, -0.4982276559, -0.5035383701, -0.5088301301, -0.514102757, -0.5193560123, -0.5245896578, -0.5298036337, -0.534997642, -0.5401714444, -0.5453249812, -0.5504579544, -0.5555702448, -0.5606615543, -0.5657318234, -0.5707807541, -0.5758081675, -0.5808139443, -0.5857978463, -0.5907596946, -0.5956993103, -0.6006164551, -0.6055110693, -0.6103827953, -0.6152315736, -0.6200572252, -0.6248595119, -0.6296382546, -0.6343932748, -0.6391244531, -0.6438315511, -0.64851439, -0.6531728506, -0.6578066945, -0.6624158025, -0.6669999361, -0.6715589762, -0.6760926843, -0.6806010008, -0.6850836873, -0.689540565, -0.6939714551, -0.6983762383, -0.7027547359, -0.7071067691, -0.7114322186, -0.7157308459, -0.720002532, -0.724247098, -0.728464365, -0.7326542735, -0.7368165851, -0.7409511209, -0.7450577617, -0.7491363883, -0.7531868219, -0.7572088242, -0.761202395, -0.7651672363, -0.7691033483, -0.7730104327, -0.7768884897, -0.7807372212, -0.7845565677, -0.7883464098, -0.7921065688, -0.7958369255, -0.7995372415, -0.8032075167, -0.8068475723, -0.81045717, -0.8140363097, -0.8175848126, -0.8211025, -0.8245893121, -0.8280450702, -0.8314695954, -0.8348628879, -0.838224709, -0.8415549994, -0.84485358, -0.8481203318, -0.851355195, -0.854557991, -0.8577286005, -0.8608669639, -0.8639728427, -0.867046237, -0.8700869679, -0.8730949759, -0.8760700822, -0.8790122271, -0.8819212914, -0.8847970963, -0.8876396418, -0.8904487491, -0.893224299, -0.8959662318, -0.8986744881, -0.9013488293, -0.903989315, -0.9065957069, -0.909168005, -0.9117060304, -0.9142097831, -0.9166790843, -0.9191138744, -0.9215140343, -0.9238795042, -0.9262102246, -0.9285060763, -0.9307669401, -0.932992816, -0.9351835251, -0.9373390079, -0.9394592047, -0.9415440559, -0.9435934424, -0.9456073046, -0.9475855827, -0.9495281577, -0.9514350295, -0.9533060193, -0.9551411867, -0.9569403529, -0.9587034583, -0.9604305029, -0.9621214271, -0.963776052, -0.9653944373, -0.9669764638, -0.9685220718, -0.9700312614, -0.9715039134, -0.9729399681, -0.974339366, -0.975702107, -0.9770281315, -0.97831738, -0.9795697927, -0.9807852507, -0.9819638729, -0.9831054807, -0.9842100739, -0.9852776527, -0.9863080978, -0.9873014092, -0.988257587, -0.9891765118, -0.9900581837, -0.9909026623, -0.9917097688, -0.9924795628, -0.993211925, -0.9939069748, -0.9945645928, -0.9951847196, -0.9957674146, -0.9963126183, -0.996820271, -0.9972904325, -0.997723043, -0.9981181026, -0.9984755516, -0.9987954497, -0.9990777373, -0.9993223548, -0.9995294213, -0.9996988177, -0.9998306036, -0.9999247193, -0.9999811649, -0, -0.006135884672, -0.01227153838, -0.01840673015, -0.02454122901, -0.030674804, -0.03680722415, -0.0429382585, -0.04906767607, -0.05519524589, -0.061320737, -0.06744392216, -0.07356456667, -0.07968243957, -0.08579730988, -0.09190895408, -0.09801714122, -0.1041216329, -0.1102222055, -0.1163186282, -0.1224106774, -0.1284981072, -0.1345807016, -0.1406582445, -0.1467304677, -0.1527971923, -0.1588581502, -0.1649131179, -0.1709618866, -0.1770042181, -0.1830398887, -0.1890686601, -0.1950903237, -0.201104641, -0.2071113735, -0.2131103128, -0.2191012353, -0.2250839174, -0.2310581058, -0.2370236069, -0.2429801822, -0.2489276081, -0.2548656464, -0.2607941031, -0.266712755, -0.2726213634, -0.27851969, -0.2844075263, -0.2902846634, -0.296150893, -0.3020059466, -0.3078496456, -0.3136817515, -0.3195020258, -0.3253102899, -0.3311063051, -0.336889863, -0.3426607251, -0.3484186828, -0.3541635275, -0.3598950505, -0.3656129837, -0.3713172078, -0.3770074248, -0.3826834261, -0.3883450329, -0.3939920366, -0.3996241987, -0.4052413106, -0.4108431637, -0.4164295495, -0.4220002592, -0.4275550842, -0.433093816, -0.438616246, -0.4441221356, -0.449611336, -0.4550835788, -0.4605387151, -0.4659765065, -0.4713967443, -0.4767992198, -0.4821837842, -0.4875501692, -0.492898196, -0.4982276559, -0.5035383701, -0.5088301301, -0.514102757, -0.5193560123, -0.5245896578, -0.5298036337, -0.534997642, -0.5401714444, -0.5453249812, -0.5504579544, -0.5555702448, -0.5606615543, -0.5657318234, -0.5707807541, -0.5758081675, -0.5808139443, -0.5857978463, -0.5907596946, -0.5956993103, -0.6006164551, -0.6055110693, -0.6103827953, -0.6152315736, -0.6200572252, -0.6248595119, -0.6296382546, -0.6343932748, -0.6391244531, -0.6438315511, -0.64851439, -0.6531728506, -0.6578066945, -0.6624158025, -0.6669999361, -0.6715589762, -0.6760926843, -0.6806010008, -0.6850836873, -0.689540565, -0.6939714551, -0.6983762383, -0.7027547359, -0.7071067691, -0.7114322186, -0.7157308459, -0.720002532, -0.724247098, -0.728464365, -0.7326542735, -0.7368165851, -0.7409511209, -0.7450577617, -0.7491363883, -0.7531868219, -0.7572088242, -0.761202395, -0.7651672363, -0.7691033483, -0.7730104327, -0.7768884897, -0.7807372212, -0.7845565677, -0.7883464098, -0.7921065688, -0.7958369255, -0.7995372415, -0.8032075167, -0.8068475723, -0.81045717, -0.8140363097, -0.8175848126, -0.8211025, -0.8245893121, -0.8280450702, -0.8314695954, -0.8348628879, -0.838224709, -0.8415549994, -0.84485358, -0.8481203318, -0.851355195, -0.854557991, -0.8577286005, -0.8608669639, -0.8639728427, -0.867046237, -0.8700869679, -0.8730949759, -0.8760700822, -0.8790122271, -0.8819212914, -0.8847970963, -0.8876396418, -0.8904487491, -0.893224299, -0.8959662318, -0.8986744881, -0.9013488293, -0.903989315, -0.9065957069, -0.909168005, -0.9117060304, -0.9142097831, -0.9166790843, -0.9191138744, -0.9215140343, -0.9238795042, -0.9262102246, -0.9285060763, -0.9307669401, -0.932992816, -0.9351835251, -0.9373390079, -0.9394592047, -0.9415440559, -0.9435934424, -0.9456073046, -0.9475855827, -0.9495281577, -0.9514350295, -0.9533060193, -0.9551411867, -0.9569403529, -0.9587034583, -0.9604305029, -0.9621214271, -0.963776052, -0.9653944373, -0.9669764638, -0.9685220718, -0.9700312614, -0.9715039134, -0.9729399681, -0.974339366, -0.975702107, -0.9770281315, -0.97831738, -0.9795697927, -0.9807852507, -0.9819638729, -0.9831054807, -0.9842100739, -0.9852776527, -0.9863080978, -0.9873014092, -0.988257587, -0.9891765118, -0.9900581837, -0.9909026623, -0.9917097688, -0.9924795628, -0.993211925, -0.9939069748, -0.9945645928, -0.9951847196, -0.9957674146, -0.9963126183, -0.996820271, -0.9972904325, -0.997723043, -0.9981181026, -0.9984755516, -0.9987954497, -0.9990777373, -0.9993223548, -0.9995294213, -0.9996988177, -0.9998306036, -0.9999247193, -0.9999811649}; -constant float ts12[512] = {-0, -0.01840673015, -0.03680722415, -0.05519524589, -0.07356456667, -0.09190895408, -0.1102222055, -0.1284981072, -0.1467304677, -0.1649131179, -0.1830398887, -0.201104641, -0.2191012353, -0.2370236069, -0.2548656464, -0.2726213634, -0.2902846634, -0.3078496456, -0.3253102899, -0.3426607251, -0.3598950505, -0.3770074248, -0.3939920366, -0.4108431637, -0.4275550842, -0.4441221356, -0.4605387151, -0.4767992198, -0.492898196, -0.5088301301, -0.5245896578, -0.5401714444, -0.5555702448, -0.5707807541, -0.5857978463, -0.6006164551, -0.6152315736, -0.6296382546, -0.6438315511, -0.6578066945, -0.6715589762, -0.6850836873, -0.6983762383, -0.7114322186, -0.724247098, -0.7368165851, -0.7491363883, -0.761202395, -0.7730104327, -0.7845565677, -0.7958369255, -0.8068475723, -0.8175848126, -0.8280450702, -0.838224709, -0.8481203318, -0.8577286005, -0.867046237, -0.8760700822, -0.8847970963, -0.893224299, -0.9013488293, -0.909168005, -0.9166790843, -0.9238795042, -0.9307669401, -0.9373390079, -0.9435934424, -0.9495281577, -0.9551411867, -0.9604305029, -0.9653944373, -0.9700312614, -0.974339366, -0.97831738, -0.9819638729, -0.9852776527, -0.988257587, -0.9909026623, -0.993211925, -0.9951847196, -0.996820271, -0.9981181026, -0.9990777373, -0.9996988177, -0.9999811649, -0.9999247193, -0.9995294213, -0.9987954497, -0.997723043, -0.9963126183, -0.9945645928, -0.9924795628, -0.9900581837, -0.9873014092, -0.9842100739, -0.9807852507, -0.9770281315, -0.9729399681, -0.9685220718, -0.963776052, -0.9587034583, -0.9533060193, -0.9475855827, -0.9415440559, -0.9351835251, -0.9285060763, -0.9215140343, -0.9142097831, -0.9065957069, -0.8986744881, -0.8904487491, -0.8819212914, -0.8730949759, -0.8639728427, -0.854557991, -0.84485358, -0.8348628879, -0.8245893121, -0.8140363097, -0.8032075167, -0.7921065688, -0.7807372212, -0.7691033483, -0.7572088242, -0.7450577617, -0.7326542735, -0.720002532, -0.7071067691, -0.6939714551, -0.6806010008, -0.6669999361, -0.6531728506, -0.6391244531, -0.6248595119, -0.6103827953, -0.5956993103, -0.5808139443, -0.5657318234, -0.5504579544, -0.534997642, -0.5193560123, -0.5035383701, -0.4875501692, -0.4713967443, -0.4550835788, -0.438616246, -0.4220002592, -0.4052413106, -0.3883450329, -0.3713172078, -0.3541635275, -0.336889863, -0.3195020258, -0.3020059466, -0.2844075263, -0.266712755, -0.2489276081, -0.2310581058, -0.2131103128, -0.1950903237, -0.1770042181, -0.1588581502, -0.1406582445, -0.1224106774, -0.1041216329, -0.08579730988, -0.06744392216, -0.04906767607, -0.030674804, -0.01227153838, 0.006135884672, 0.02454122901, 0.0429382585, 0.061320737, 0.07968243957, 0.09801714122, 0.1163186282, 0.1345807016, 0.1527971923, 0.1709618866, 0.1890686601, 0.2071113735, 0.2250839174, 0.2429801822, 0.2607941031, 0.27851969, 0.296150893, 0.3136817515, 0.3311063051, 0.3484186828, 0.3656129837, 0.3826834261, 0.3996241987, 0.4164295495, 0.433093816, 0.449611336, 0.4659765065, 0.4821837842, 0.4982276559, 0.514102757, 0.5298036337, 0.5453249812, 0.5606615543, 0.5758081675, 0.5907596946, 0.6055110693, 0.6200572252, 0.6343932748, 0.64851439, 0.6624158025, 0.6760926843, 0.689540565, 0.7027547359, 0.7157308459, 0.728464365, 0.7409511209, 0.7531868219, 0.7651672363, 0.7768884897, 0.7883464098, 0.7995372415, 0.81045717, 0.8211025, 0.8314695954, 0.8415549994, 0.851355195, 0.8608669639, 0.8700869679, 0.8790122271, 0.8876396418, 0.8959662318, 0.903989315, 0.9117060304, 0.9191138744, 0.9262102246, 0.932992816, 0.9394592047, 0.9456073046, 0.9514350295, 0.9569403529, 0.9621214271, 0.9669764638, 0.9715039134, 0.975702107, 0.9795697927, 0.9831054807, 0.9863080978, 0.9891765118, 0.9917097688, 0.9939069748, 0.9957674146, 0.9972904325, 0.9984755516, 0.9993223548, 0.9998306036, -0, -0.01840673015, -0.03680722415, -0.05519524589, -0.07356456667, -0.09190895408, -0.1102222055, -0.1284981072, -0.1467304677, -0.1649131179, -0.1830398887, -0.201104641, -0.2191012353, -0.2370236069, -0.2548656464, -0.2726213634, -0.2902846634, -0.3078496456, -0.3253102899, -0.3426607251, -0.3598950505, -0.3770074248, -0.3939920366, -0.4108431637, -0.4275550842, -0.4441221356, -0.4605387151, -0.4767992198, -0.492898196, -0.5088301301, -0.5245896578, -0.5401714444, -0.5555702448, -0.5707807541, -0.5857978463, -0.6006164551, -0.6152315736, -0.6296382546, -0.6438315511, -0.6578066945, -0.6715589762, -0.6850836873, -0.6983762383, -0.7114322186, -0.724247098, -0.7368165851, -0.7491363883, -0.761202395, -0.7730104327, -0.7845565677, -0.7958369255, -0.8068475723, -0.8175848126, -0.8280450702, -0.838224709, -0.8481203318, -0.8577286005, -0.867046237, -0.8760700822, -0.8847970963, -0.893224299, -0.9013488293, -0.909168005, -0.9166790843, -0.9238795042, -0.9307669401, -0.9373390079, -0.9435934424, -0.9495281577, -0.9551411867, -0.9604305029, -0.9653944373, -0.9700312614, -0.974339366, -0.97831738, -0.9819638729, -0.9852776527, -0.988257587, -0.9909026623, -0.993211925, -0.9951847196, -0.996820271, -0.9981181026, -0.9990777373, -0.9996988177, -0.9999811649, -0.9999247193, -0.9995294213, -0.9987954497, -0.997723043, -0.9963126183, -0.9945645928, -0.9924795628, -0.9900581837, -0.9873014092, -0.9842100739, -0.9807852507, -0.9770281315, -0.9729399681, -0.9685220718, -0.963776052, -0.9587034583, -0.9533060193, -0.9475855827, -0.9415440559, -0.9351835251, -0.9285060763, -0.9215140343, -0.9142097831, -0.9065957069, -0.8986744881, -0.8904487491, -0.8819212914, -0.8730949759, -0.8639728427, -0.854557991, -0.84485358, -0.8348628879, -0.8245893121, -0.8140363097, -0.8032075167, -0.7921065688, -0.7807372212, -0.7691033483, -0.7572088242, -0.7450577617, -0.7326542735, -0.720002532, -0.7071067691, -0.6939714551, -0.6806010008, -0.6669999361, -0.6531728506, -0.6391244531, -0.6248595119, -0.6103827953, -0.5956993103, -0.5808139443, -0.5657318234, -0.5504579544, -0.534997642, -0.5193560123, -0.5035383701, -0.4875501692, -0.4713967443, -0.4550835788, -0.438616246, -0.4220002592, -0.4052413106, -0.3883450329, -0.3713172078, -0.3541635275, -0.336889863, -0.3195020258, -0.3020059466, -0.2844075263, -0.266712755, -0.2489276081, -0.2310581058, -0.2131103128, -0.1950903237, -0.1770042181, -0.1588581502, -0.1406582445, -0.1224106774, -0.1041216329, -0.08579730988, -0.06744392216, -0.04906767607, -0.030674804, -0.01227153838, 0.006135884672, 0.02454122901, 0.0429382585, 0.061320737, 0.07968243957, 0.09801714122, 0.1163186282, 0.1345807016, 0.1527971923, 0.1709618866, 0.1890686601, 0.2071113735, 0.2250839174, 0.2429801822, 0.2607941031, 0.27851969, 0.296150893, 0.3136817515, 0.3311063051, 0.3484186828, 0.3656129837, 0.3826834261, 0.3996241987, 0.4164295495, 0.433093816, 0.449611336, 0.4659765065, 0.4821837842, 0.4982276559, 0.514102757, 0.5298036337, 0.5453249812, 0.5606615543, 0.5758081675, 0.5907596946, 0.6055110693, 0.6200572252, 0.6343932748, 0.64851439, 0.6624158025, 0.6760926843, 0.689540565, 0.7027547359, 0.7157308459, 0.728464365, 0.7409511209, 0.7531868219, 0.7651672363, 0.7768884897, 0.7883464098, 0.7995372415, 0.81045717, 0.8211025, 0.8314695954, 0.8415549994, 0.851355195, 0.8608669639, 0.8700869679, 0.8790122271, 0.8876396418, 0.8959662318, 0.903989315, 0.9117060304, 0.9191138744, 0.9262102246, 0.932992816, 0.9394592047, 0.9456073046, 0.9514350295, 0.9569403529, 0.9621214271, 0.9669764638, 0.9715039134, 0.975702107, 0.9795697927, 0.9831054807, 0.9863080978, 0.9891765118, 0.9917097688, 0.9939069748, 0.9957674146, 0.9972904325, 0.9984755516, 0.9993223548, 0.9998306036}; -constant float ts15[512] = {-0, -0.01840673015, -0.03680722415, -0.05519524589, -0.07356456667, -0.09190895408, -0.1102222055, -0.1284981072, -0.1467304677, -0.1649131179, -0.1830398887, -0.201104641, -0.2191012353, -0.2370236069, -0.2548656464, -0.2726213634, -0.2902846634, -0.3078496456, -0.3253102899, -0.3426607251, -0.3598950505, -0.3770074248, -0.3939920366, -0.4108431637, -0.4275550842, -0.4441221356, -0.4605387151, -0.4767992198, -0.492898196, -0.5088301301, -0.5245896578, -0.5401714444, -0.5555702448, -0.5707807541, -0.5857978463, -0.6006164551, -0.6152315736, -0.6296382546, -0.6438315511, -0.6578066945, -0.6715589762, -0.6850836873, -0.6983762383, -0.7114322186, -0.724247098, -0.7368165851, -0.7491363883, -0.761202395, -0.7730104327, -0.7845565677, -0.7958369255, -0.8068475723, -0.8175848126, -0.8280450702, -0.838224709, -0.8481203318, -0.8577286005, -0.867046237, -0.8760700822, -0.8847970963, -0.893224299, -0.9013488293, -0.909168005, -0.9166790843, -0.9238795042, -0.9307669401, -0.9373390079, -0.9435934424, -0.9495281577, -0.9551411867, -0.9604305029, -0.9653944373, -0.9700312614, -0.974339366, -0.97831738, -0.9819638729, -0.9852776527, -0.988257587, -0.9909026623, -0.993211925, -0.9951847196, -0.996820271, -0.9981181026, -0.9990777373, -0.9996988177, -0.9999811649, -0.9999247193, -0.9995294213, -0.9987954497, -0.997723043, -0.9963126183, -0.9945645928, -0.9924795628, -0.9900581837, -0.9873014092, -0.9842100739, -0.9807852507, -0.9770281315, -0.9729399681, -0.9685220718, -0.963776052, -0.9587034583, -0.9533060193, -0.9475855827, -0.9415440559, -0.9351835251, -0.9285060763, -0.9215140343, -0.9142097831, -0.9065957069, -0.8986744881, -0.8904487491, -0.8819212914, -0.8730949759, -0.8639728427, -0.854557991, -0.84485358, -0.8348628879, -0.8245893121, -0.8140363097, -0.8032075167, -0.7921065688, -0.7807372212, -0.7691033483, -0.7572088242, -0.7450577617, -0.7326542735, -0.720002532, -0.7071067691, -0.6939714551, -0.6806010008, -0.6669999361, -0.6531728506, -0.6391244531, -0.6248595119, -0.6103827953, -0.5956993103, -0.5808139443, -0.5657318234, -0.5504579544, -0.534997642, -0.5193560123, -0.5035383701, -0.4875501692, -0.4713967443, -0.4550835788, -0.438616246, -0.4220002592, -0.4052413106, -0.3883450329, -0.3713172078, -0.3541635275, -0.336889863, -0.3195020258, -0.3020059466, -0.2844075263, -0.266712755, -0.2489276081, -0.2310581058, -0.2131103128, -0.1950903237, -0.1770042181, -0.1588581502, -0.1406582445, -0.1224106774, -0.1041216329, -0.08579730988, -0.06744392216, -0.04906767607, -0.030674804, -0.01227153838, 0.006135884672, 0.02454122901, 0.0429382585, 0.061320737, 0.07968243957, 0.09801714122, 0.1163186282, 0.1345807016, 0.1527971923, 0.1709618866, 0.1890686601, 0.2071113735, 0.2250839174, 0.2429801822, 0.2607941031, 0.27851969, 0.296150893, 0.3136817515, 0.3311063051, 0.3484186828, 0.3656129837, 0.3826834261, 0.3996241987, 0.4164295495, 0.433093816, 0.449611336, 0.4659765065, 0.4821837842, 0.4982276559, 0.514102757, 0.5298036337, 0.5453249812, 0.5606615543, 0.5758081675, 0.5907596946, 0.6055110693, 0.6200572252, 0.6343932748, 0.64851439, 0.6624158025, 0.6760926843, 0.689540565, 0.7027547359, 0.7157308459, 0.728464365, 0.7409511209, 0.7531868219, 0.7651672363, 0.7768884897, 0.7883464098, 0.7995372415, 0.81045717, 0.8211025, 0.8314695954, 0.8415549994, 0.851355195, 0.8608669639, 0.8700869679, 0.8790122271, 0.8876396418, 0.8959662318, 0.903989315, 0.9117060304, 0.9191138744, 0.9262102246, 0.932992816, 0.9394592047, 0.9456073046, 0.9514350295, 0.9569403529, 0.9621214271, 0.9669764638, 0.9715039134, 0.975702107, 0.9795697927, 0.9831054807, 0.9863080978, 0.9891765118, 0.9917097688, 0.9939069748, 0.9957674146, 0.9972904325, 0.9984755516, 0.9993223548, 0.9998306036, -0, -0.01840673015, -0.03680722415, -0.05519524589, -0.07356456667, -0.09190895408, -0.1102222055, -0.1284981072, -0.1467304677, -0.1649131179, -0.1830398887, -0.201104641, -0.2191012353, -0.2370236069, -0.2548656464, -0.2726213634, -0.2902846634, -0.3078496456, -0.3253102899, -0.3426607251, -0.3598950505, -0.3770074248, -0.3939920366, -0.4108431637, -0.4275550842, -0.4441221356, -0.4605387151, -0.4767992198, -0.492898196, -0.5088301301, -0.5245896578, -0.5401714444, -0.5555702448, -0.5707807541, -0.5857978463, -0.6006164551, -0.6152315736, -0.6296382546, -0.6438315511, -0.6578066945, -0.6715589762, -0.6850836873, -0.6983762383, -0.7114322186, -0.724247098, -0.7368165851, -0.7491363883, -0.761202395, -0.7730104327, -0.7845565677, -0.7958369255, -0.8068475723, -0.8175848126, -0.8280450702, -0.838224709, -0.8481203318, -0.8577286005, -0.867046237, -0.8760700822, -0.8847970963, -0.893224299, -0.9013488293, -0.909168005, -0.9166790843, -0.9238795042, -0.9307669401, -0.9373390079, -0.9435934424, -0.9495281577, -0.9551411867, -0.9604305029, -0.9653944373, -0.9700312614, -0.974339366, -0.97831738, -0.9819638729, -0.9852776527, -0.988257587, -0.9909026623, -0.993211925, -0.9951847196, -0.996820271, -0.9981181026, -0.9990777373, -0.9996988177, -0.9999811649, -0.9999247193, -0.9995294213, -0.9987954497, -0.997723043, -0.9963126183, -0.9945645928, -0.9924795628, -0.9900581837, -0.9873014092, -0.9842100739, -0.9807852507, -0.9770281315, -0.9729399681, -0.9685220718, -0.963776052, -0.9587034583, -0.9533060193, -0.9475855827, -0.9415440559, -0.9351835251, -0.9285060763, -0.9215140343, -0.9142097831, -0.9065957069, -0.8986744881, -0.8904487491, -0.8819212914, -0.8730949759, -0.8639728427, -0.854557991, -0.84485358, -0.8348628879, -0.8245893121, -0.8140363097, -0.8032075167, -0.7921065688, -0.7807372212, -0.7691033483, -0.7572088242, -0.7450577617, -0.7326542735, -0.720002532, -0.7071067691, -0.6939714551, -0.6806010008, -0.6669999361, -0.6531728506, -0.6391244531, -0.6248595119, -0.6103827953, -0.5956993103, -0.5808139443, -0.5657318234, -0.5504579544, -0.534997642, -0.5193560123, -0.5035383701, -0.4875501692, -0.4713967443, -0.4550835788, -0.438616246, -0.4220002592, -0.4052413106, -0.3883450329, -0.3713172078, -0.3541635275, -0.336889863, -0.3195020258, -0.3020059466, -0.2844075263, -0.266712755, -0.2489276081, -0.2310581058, -0.2131103128, -0.1950903237, -0.1770042181, -0.1588581502, -0.1406582445, -0.1224106774, -0.1041216329, -0.08579730988, -0.06744392216, -0.04906767607, -0.030674804, -0.01227153838, 0.006135884672, 0.02454122901, 0.0429382585, 0.061320737, 0.07968243957, 0.09801714122, 0.1163186282, 0.1345807016, 0.1527971923, 0.1709618866, 0.1890686601, 0.2071113735, 0.2250839174, 0.2429801822, 0.2607941031, 0.27851969, 0.296150893, 0.3136817515, 0.3311063051, 0.3484186828, 0.3656129837, 0.3826834261, 0.3996241987, 0.4164295495, 0.433093816, 0.449611336, 0.4659765065, 0.4821837842, 0.4982276559, 0.514102757, 0.5298036337, 0.5453249812, 0.5606615543, 0.5758081675, 0.5907596946, 0.6055110693, 0.6200572252, 0.6343932748, 0.64851439, 0.6624158025, 0.6760926843, 0.689540565, 0.7027547359, 0.7157308459, 0.728464365, 0.7409511209, 0.7531868219, 0.7651672363, 0.7768884897, 0.7883464098, 0.7995372415, 0.81045717, 0.8211025, 0.8314695954, 0.8415549994, 0.851355195, 0.8608669639, 0.8700869679, 0.8790122271, 0.8876396418, 0.8959662318, 0.903989315, 0.9117060304, 0.9191138744, 0.9262102246, 0.932992816, 0.9394592047, 0.9456073046, 0.9514350295, 0.9569403529, 0.9621214271, 0.9669764638, 0.9715039134, 0.975702107, 0.9795697927, 0.9831054807, 0.9863080978, 0.9891765118, 0.9917097688, 0.9939069748, 0.9957674146, 0.9972904325, 0.9984755516, 0.9993223548, 0.9998306036}; -constant float ts20[512] = {-0, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, -1, -0.9987954497, -0.9951847196, -0.9891765118, -0.9807852507, -0.9700312614, -0.9569403529, -0.9415440559, -0.9238795042, -0.903989315, -0.8819212914, -0.8577286005, -0.8314695954, -0.8032075167, -0.7730104327, -0.7409511209, -0.7071067691, -0.6715589762, -0.6343932748, -0.5956993103, -0.5555702448, -0.514102757, -0.4713967443, -0.4275550842, -0.3826834261, -0.336889863, -0.2902846634, -0.2429801822, -0.1950903237, -0.1467304677, -0.09801714122, -0.04906767607, -0, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, -1, -0.9987954497, -0.9951847196, -0.9891765118, -0.9807852507, -0.9700312614, -0.9569403529, -0.9415440559, -0.9238795042, -0.903989315, -0.8819212914, -0.8577286005, -0.8314695954, -0.8032075167, -0.7730104327, -0.7409511209, -0.7071067691, -0.6715589762, -0.6343932748, -0.5956993103, -0.5555702448, -0.514102757, -0.4713967443, -0.4275550842, -0.3826834261, -0.336889863, -0.2902846634, -0.2429801822, -0.1950903237, -0.1467304677, -0.09801714122, -0.04906767607, -0, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, -1, -0.9987954497, -0.9951847196, -0.9891765118, -0.9807852507, -0.9700312614, -0.9569403529, -0.9415440559, -0.9238795042, -0.903989315, -0.8819212914, -0.8577286005, -0.8314695954, -0.8032075167, -0.7730104327, -0.7409511209, -0.7071067691, -0.6715589762, -0.6343932748, -0.5956993103, -0.5555702448, -0.514102757, -0.4713967443, -0.4275550842, -0.3826834261, -0.336889863, -0.2902846634, -0.2429801822, -0.1950903237, -0.1467304677, -0.09801714122, -0.04906767607, -0, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, -1, -0.9987954497, -0.9951847196, -0.9891765118, -0.9807852507, -0.9700312614, -0.9569403529, -0.9415440559, -0.9238795042, -0.903989315, -0.8819212914, -0.8577286005, -0.8314695954, -0.8032075167, -0.7730104327, -0.7409511209, -0.7071067691, -0.6715589762, -0.6343932748, -0.5956993103, -0.5555702448, -0.514102757, -0.4713967443, -0.4275550842, -0.3826834261, -0.336889863, -0.2902846634, -0.2429801822, -0.1950903237, -0.1467304677, -0.09801714122, -0.04906767607, -0, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, -1, -0.9987954497, -0.9951847196, -0.9891765118, -0.9807852507, -0.9700312614, -0.9569403529, -0.9415440559, -0.9238795042, -0.903989315, -0.8819212914, -0.8577286005, -0.8314695954, -0.8032075167, -0.7730104327, -0.7409511209, -0.7071067691, -0.6715589762, -0.6343932748, -0.5956993103, -0.5555702448, -0.514102757, -0.4713967443, -0.4275550842, -0.3826834261, -0.336889863, -0.2902846634, -0.2429801822, -0.1950903237, -0.1467304677, -0.09801714122, -0.04906767607, -0, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, -1, -0.9987954497, -0.9951847196, -0.9891765118, -0.9807852507, -0.9700312614, -0.9569403529, -0.9415440559, -0.9238795042, -0.903989315, -0.8819212914, -0.8577286005, -0.8314695954, -0.8032075167, -0.7730104327, -0.7409511209, -0.7071067691, -0.6715589762, -0.6343932748, -0.5956993103, -0.5555702448, -0.514102757, -0.4713967443, -0.4275550842, -0.3826834261, -0.336889863, -0.2902846634, -0.2429801822, -0.1950903237, -0.1467304677, -0.09801714122, -0.04906767607, -0, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, -1, -0.9987954497, -0.9951847196, -0.9891765118, -0.9807852507, -0.9700312614, -0.9569403529, -0.9415440559, -0.9238795042, -0.903989315, -0.8819212914, -0.8577286005, -0.8314695954, -0.8032075167, -0.7730104327, -0.7409511209, -0.7071067691, -0.6715589762, -0.6343932748, -0.5956993103, -0.5555702448, -0.514102757, -0.4713967443, -0.4275550842, -0.3826834261, -0.336889863, -0.2902846634, -0.2429801822, -0.1950903237, -0.1467304677, -0.09801714122, -0.04906767607, -0, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, -1, -0.9987954497, -0.9951847196, -0.9891765118, -0.9807852507, -0.9700312614, -0.9569403529, -0.9415440559, -0.9238795042, -0.903989315, -0.8819212914, -0.8577286005, -0.8314695954, -0.8032075167, -0.7730104327, -0.7409511209, -0.7071067691, -0.6715589762, -0.6343932748, -0.5956993103, -0.5555702448, -0.514102757, -0.4713967443, -0.4275550842, -0.3826834261, -0.336889863, -0.2902846634, -0.2429801822, -0.1950903237, -0.1467304677, -0.09801714122, -0.04906767607}; -constant float ts23[512] = {-0, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, -1, -0.9987954497, -0.9951847196, -0.9891765118, -0.9807852507, -0.9700312614, -0.9569403529, -0.9415440559, -0.9238795042, -0.903989315, -0.8819212914, -0.8577286005, -0.8314695954, -0.8032075167, -0.7730104327, -0.7409511209, -0.7071067691, -0.6715589762, -0.6343932748, -0.5956993103, -0.5555702448, -0.514102757, -0.4713967443, -0.4275550842, -0.3826834261, -0.336889863, -0.2902846634, -0.2429801822, -0.1950903237, -0.1467304677, -0.09801714122, -0.04906767607, -0, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, -1, -0.9987954497, -0.9951847196, -0.9891765118, -0.9807852507, -0.9700312614, -0.9569403529, -0.9415440559, -0.9238795042, -0.903989315, -0.8819212914, -0.8577286005, -0.8314695954, -0.8032075167, -0.7730104327, -0.7409511209, -0.7071067691, -0.6715589762, -0.6343932748, -0.5956993103, -0.5555702448, -0.514102757, -0.4713967443, -0.4275550842, -0.3826834261, -0.336889863, -0.2902846634, -0.2429801822, -0.1950903237, -0.1467304677, -0.09801714122, -0.04906767607, -0, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, -1, -0.9987954497, -0.9951847196, -0.9891765118, -0.9807852507, -0.9700312614, -0.9569403529, -0.9415440559, -0.9238795042, -0.903989315, -0.8819212914, -0.8577286005, -0.8314695954, -0.8032075167, -0.7730104327, -0.7409511209, -0.7071067691, -0.6715589762, -0.6343932748, -0.5956993103, -0.5555702448, -0.514102757, -0.4713967443, -0.4275550842, -0.3826834261, -0.336889863, -0.2902846634, -0.2429801822, -0.1950903237, -0.1467304677, -0.09801714122, -0.04906767607, -0, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, -1, -0.9987954497, -0.9951847196, -0.9891765118, -0.9807852507, -0.9700312614, -0.9569403529, -0.9415440559, -0.9238795042, -0.903989315, -0.8819212914, -0.8577286005, -0.8314695954, -0.8032075167, -0.7730104327, -0.7409511209, -0.7071067691, -0.6715589762, -0.6343932748, -0.5956993103, -0.5555702448, -0.514102757, -0.4713967443, -0.4275550842, -0.3826834261, -0.336889863, -0.2902846634, -0.2429801822, -0.1950903237, -0.1467304677, -0.09801714122, -0.04906767607, -0, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, -1, -0.9987954497, -0.9951847196, -0.9891765118, -0.9807852507, -0.9700312614, -0.9569403529, -0.9415440559, -0.9238795042, -0.903989315, -0.8819212914, -0.8577286005, -0.8314695954, -0.8032075167, -0.7730104327, -0.7409511209, -0.7071067691, -0.6715589762, -0.6343932748, -0.5956993103, -0.5555702448, -0.514102757, -0.4713967443, -0.4275550842, -0.3826834261, -0.336889863, -0.2902846634, -0.2429801822, -0.1950903237, -0.1467304677, -0.09801714122, -0.04906767607, -0, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, -1, -0.9987954497, -0.9951847196, -0.9891765118, -0.9807852507, -0.9700312614, -0.9569403529, -0.9415440559, -0.9238795042, -0.903989315, -0.8819212914, -0.8577286005, -0.8314695954, -0.8032075167, -0.7730104327, -0.7409511209, -0.7071067691, -0.6715589762, -0.6343932748, -0.5956993103, -0.5555702448, -0.514102757, -0.4713967443, -0.4275550842, -0.3826834261, -0.336889863, -0.2902846634, -0.2429801822, -0.1950903237, -0.1467304677, -0.09801714122, -0.04906767607, -0, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, -1, -0.9987954497, -0.9951847196, -0.9891765118, -0.9807852507, -0.9700312614, -0.9569403529, -0.9415440559, -0.9238795042, -0.903989315, -0.8819212914, -0.8577286005, -0.8314695954, -0.8032075167, -0.7730104327, -0.7409511209, -0.7071067691, -0.6715589762, -0.6343932748, -0.5956993103, -0.5555702448, -0.514102757, -0.4713967443, -0.4275550842, -0.3826834261, -0.336889863, -0.2902846634, -0.2429801822, -0.1950903237, -0.1467304677, -0.09801714122, -0.04906767607, -0, -0.04906767607, -0.09801714122, -0.1467304677, -0.1950903237, -0.2429801822, -0.2902846634, -0.336889863, -0.3826834261, -0.4275550842, -0.4713967443, -0.514102757, -0.5555702448, -0.5956993103, -0.6343932748, -0.6715589762, -0.7071067691, -0.7409511209, -0.7730104327, -0.8032075167, -0.8314695954, -0.8577286005, -0.8819212914, -0.903989315, -0.9238795042, -0.9415440559, -0.9569403529, -0.9700312614, -0.9807852507, -0.9891765118, -0.9951847196, -0.9987954497, -1, -0.9987954497, -0.9951847196, -0.9891765118, -0.9807852507, -0.9700312614, -0.9569403529, -0.9415440559, -0.9238795042, -0.903989315, -0.8819212914, -0.8577286005, -0.8314695954, -0.8032075167, -0.7730104327, -0.7409511209, -0.7071067691, -0.6715589762, -0.6343932748, -0.5956993103, -0.5555702448, -0.514102757, -0.4713967443, -0.4275550842, -0.3826834261, -0.336889863, -0.2902846634, -0.2429801822, -0.1950903237, -0.1467304677, -0.09801714122, -0.04906767607}; -constant float ts21[512] = {-0, -0.02454122901, -0.04906767607, -0.07356456667, -0.09801714122, -0.1224106774, -0.1467304677, -0.1709618866, -0.1950903237, -0.2191012353, -0.2429801822, -0.266712755, -0.2902846634, -0.3136817515, -0.336889863, -0.3598950505, -0.3826834261, -0.4052413106, -0.4275550842, -0.449611336, -0.4713967443, -0.492898196, -0.514102757, -0.534997642, -0.5555702448, -0.5758081675, -0.5956993103, -0.6152315736, -0.6343932748, -0.6531728506, -0.6715589762, -0.689540565, -0.7071067691, -0.724247098, -0.7409511209, -0.7572088242, -0.7730104327, -0.7883464098, -0.8032075167, -0.8175848126, -0.8314695954, -0.84485358, -0.8577286005, -0.8700869679, -0.8819212914, -0.893224299, -0.903989315, -0.9142097831, -0.9238795042, -0.932992816, -0.9415440559, -0.9495281577, -0.9569403529, -0.963776052, -0.9700312614, -0.975702107, -0.9807852507, -0.9852776527, -0.9891765118, -0.9924795628, -0.9951847196, -0.9972904325, -0.9987954497, -0.9996988177, -0, -0.02454122901, -0.04906767607, -0.07356456667, -0.09801714122, -0.1224106774, -0.1467304677, -0.1709618866, -0.1950903237, -0.2191012353, -0.2429801822, -0.266712755, -0.2902846634, -0.3136817515, -0.336889863, -0.3598950505, -0.3826834261, -0.4052413106, -0.4275550842, -0.449611336, -0.4713967443, -0.492898196, -0.514102757, -0.534997642, -0.5555702448, -0.5758081675, -0.5956993103, -0.6152315736, -0.6343932748, -0.6531728506, -0.6715589762, -0.689540565, -0.7071067691, -0.724247098, -0.7409511209, -0.7572088242, -0.7730104327, -0.7883464098, -0.8032075167, -0.8175848126, -0.8314695954, -0.84485358, -0.8577286005, -0.8700869679, -0.8819212914, -0.893224299, -0.903989315, -0.9142097831, -0.9238795042, -0.932992816, -0.9415440559, -0.9495281577, -0.9569403529, -0.963776052, -0.9700312614, -0.975702107, -0.9807852507, -0.9852776527, -0.9891765118, -0.9924795628, -0.9951847196, -0.9972904325, -0.9987954497, -0.9996988177, -0, -0.02454122901, -0.04906767607, -0.07356456667, -0.09801714122, -0.1224106774, -0.1467304677, -0.1709618866, -0.1950903237, -0.2191012353, -0.2429801822, -0.266712755, -0.2902846634, -0.3136817515, -0.336889863, -0.3598950505, -0.3826834261, -0.4052413106, -0.4275550842, -0.449611336, -0.4713967443, -0.492898196, -0.514102757, -0.534997642, -0.5555702448, -0.5758081675, -0.5956993103, -0.6152315736, -0.6343932748, -0.6531728506, -0.6715589762, -0.689540565, -0.7071067691, -0.724247098, -0.7409511209, -0.7572088242, -0.7730104327, -0.7883464098, -0.8032075167, -0.8175848126, -0.8314695954, -0.84485358, -0.8577286005, -0.8700869679, -0.8819212914, -0.893224299, -0.903989315, -0.9142097831, -0.9238795042, -0.932992816, -0.9415440559, -0.9495281577, -0.9569403529, -0.963776052, -0.9700312614, -0.975702107, -0.9807852507, -0.9852776527, -0.9891765118, -0.9924795628, -0.9951847196, -0.9972904325, -0.9987954497, -0.9996988177, -0, -0.02454122901, -0.04906767607, -0.07356456667, -0.09801714122, -0.1224106774, -0.1467304677, -0.1709618866, -0.1950903237, -0.2191012353, -0.2429801822, -0.266712755, -0.2902846634, -0.3136817515, -0.336889863, -0.3598950505, -0.3826834261, -0.4052413106, -0.4275550842, -0.449611336, -0.4713967443, -0.492898196, -0.514102757, -0.534997642, -0.5555702448, -0.5758081675, -0.5956993103, -0.6152315736, -0.6343932748, -0.6531728506, -0.6715589762, -0.689540565, -0.7071067691, -0.724247098, -0.7409511209, -0.7572088242, -0.7730104327, -0.7883464098, -0.8032075167, -0.8175848126, -0.8314695954, -0.84485358, -0.8577286005, -0.8700869679, -0.8819212914, -0.893224299, -0.903989315, -0.9142097831, -0.9238795042, -0.932992816, -0.9415440559, -0.9495281577, -0.9569403529, -0.963776052, -0.9700312614, -0.975702107, -0.9807852507, -0.9852776527, -0.9891765118, -0.9924795628, -0.9951847196, -0.9972904325, -0.9987954497, -0.9996988177, -0, -0.02454122901, -0.04906767607, -0.07356456667, -0.09801714122, -0.1224106774, -0.1467304677, -0.1709618866, -0.1950903237, -0.2191012353, -0.2429801822, -0.266712755, -0.2902846634, -0.3136817515, -0.336889863, -0.3598950505, -0.3826834261, -0.4052413106, -0.4275550842, -0.449611336, -0.4713967443, -0.492898196, -0.514102757, -0.534997642, -0.5555702448, -0.5758081675, -0.5956993103, -0.6152315736, -0.6343932748, -0.6531728506, -0.6715589762, -0.689540565, -0.7071067691, -0.724247098, -0.7409511209, -0.7572088242, -0.7730104327, -0.7883464098, -0.8032075167, -0.8175848126, -0.8314695954, -0.84485358, -0.8577286005, -0.8700869679, -0.8819212914, -0.893224299, -0.903989315, -0.9142097831, -0.9238795042, -0.932992816, -0.9415440559, -0.9495281577, -0.9569403529, -0.963776052, -0.9700312614, -0.975702107, -0.9807852507, -0.9852776527, -0.9891765118, -0.9924795628, -0.9951847196, -0.9972904325, -0.9987954497, -0.9996988177, -0, -0.02454122901, -0.04906767607, -0.07356456667, -0.09801714122, -0.1224106774, -0.1467304677, -0.1709618866, -0.1950903237, -0.2191012353, -0.2429801822, -0.266712755, -0.2902846634, -0.3136817515, -0.336889863, -0.3598950505, -0.3826834261, -0.4052413106, -0.4275550842, -0.449611336, -0.4713967443, -0.492898196, -0.514102757, -0.534997642, -0.5555702448, -0.5758081675, -0.5956993103, -0.6152315736, -0.6343932748, -0.6531728506, -0.6715589762, -0.689540565, -0.7071067691, -0.724247098, -0.7409511209, -0.7572088242, -0.7730104327, -0.7883464098, -0.8032075167, -0.8175848126, -0.8314695954, -0.84485358, -0.8577286005, -0.8700869679, -0.8819212914, -0.893224299, -0.903989315, -0.9142097831, -0.9238795042, -0.932992816, -0.9415440559, -0.9495281577, -0.9569403529, -0.963776052, -0.9700312614, -0.975702107, -0.9807852507, -0.9852776527, -0.9891765118, -0.9924795628, -0.9951847196, -0.9972904325, -0.9987954497, -0.9996988177, -0, -0.02454122901, -0.04906767607, -0.07356456667, -0.09801714122, -0.1224106774, -0.1467304677, -0.1709618866, -0.1950903237, -0.2191012353, -0.2429801822, -0.266712755, -0.2902846634, -0.3136817515, -0.336889863, -0.3598950505, -0.3826834261, -0.4052413106, -0.4275550842, -0.449611336, -0.4713967443, -0.492898196, -0.514102757, -0.534997642, -0.5555702448, -0.5758081675, -0.5956993103, -0.6152315736, -0.6343932748, -0.6531728506, -0.6715589762, -0.689540565, -0.7071067691, -0.724247098, -0.7409511209, -0.7572088242, -0.7730104327, -0.7883464098, -0.8032075167, -0.8175848126, -0.8314695954, -0.84485358, -0.8577286005, -0.8700869679, -0.8819212914, -0.893224299, -0.903989315, -0.9142097831, -0.9238795042, -0.932992816, -0.9415440559, -0.9495281577, -0.9569403529, -0.963776052, -0.9700312614, -0.975702107, -0.9807852507, -0.9852776527, -0.9891765118, -0.9924795628, -0.9951847196, -0.9972904325, -0.9987954497, -0.9996988177, -0, -0.02454122901, -0.04906767607, -0.07356456667, -0.09801714122, -0.1224106774, -0.1467304677, -0.1709618866, -0.1950903237, -0.2191012353, -0.2429801822, -0.266712755, -0.2902846634, -0.3136817515, -0.336889863, -0.3598950505, -0.3826834261, -0.4052413106, -0.4275550842, -0.449611336, -0.4713967443, -0.492898196, -0.514102757, -0.534997642, -0.5555702448, -0.5758081675, -0.5956993103, -0.6152315736, -0.6343932748, -0.6531728506, -0.6715589762, -0.689540565, -0.7071067691, -0.724247098, -0.7409511209, -0.7572088242, -0.7730104327, -0.7883464098, -0.8032075167, -0.8175848126, -0.8314695954, -0.84485358, -0.8577286005, -0.8700869679, -0.8819212914, -0.893224299, -0.903989315, -0.9142097831, -0.9238795042, -0.932992816, -0.9415440559, -0.9495281577, -0.9569403529, -0.963776052, -0.9700312614, -0.975702107, -0.9807852507, -0.9852776527, -0.9891765118, -0.9924795628, -0.9951847196, -0.9972904325, -0.9987954497, -0.9996988177}; -constant float ts24[512] = {-0, -0.02454122901, -0.04906767607, -0.07356456667, -0.09801714122, -0.1224106774, -0.1467304677, -0.1709618866, -0.1950903237, -0.2191012353, -0.2429801822, -0.266712755, -0.2902846634, -0.3136817515, -0.336889863, -0.3598950505, -0.3826834261, -0.4052413106, -0.4275550842, -0.449611336, -0.4713967443, -0.492898196, -0.514102757, -0.534997642, -0.5555702448, -0.5758081675, -0.5956993103, -0.6152315736, -0.6343932748, -0.6531728506, -0.6715589762, -0.689540565, -0.7071067691, -0.724247098, -0.7409511209, -0.7572088242, -0.7730104327, -0.7883464098, -0.8032075167, -0.8175848126, -0.8314695954, -0.84485358, -0.8577286005, -0.8700869679, -0.8819212914, -0.893224299, -0.903989315, -0.9142097831, -0.9238795042, -0.932992816, -0.9415440559, -0.9495281577, -0.9569403529, -0.963776052, -0.9700312614, -0.975702107, -0.9807852507, -0.9852776527, -0.9891765118, -0.9924795628, -0.9951847196, -0.9972904325, -0.9987954497, -0.9996988177, -0, -0.02454122901, -0.04906767607, -0.07356456667, -0.09801714122, -0.1224106774, -0.1467304677, -0.1709618866, -0.1950903237, -0.2191012353, -0.2429801822, -0.266712755, -0.2902846634, -0.3136817515, -0.336889863, -0.3598950505, -0.3826834261, -0.4052413106, -0.4275550842, -0.449611336, -0.4713967443, -0.492898196, -0.514102757, -0.534997642, -0.5555702448, -0.5758081675, -0.5956993103, -0.6152315736, -0.6343932748, -0.6531728506, -0.6715589762, -0.689540565, -0.7071067691, -0.724247098, -0.7409511209, -0.7572088242, -0.7730104327, -0.7883464098, -0.8032075167, -0.8175848126, -0.8314695954, -0.84485358, -0.8577286005, -0.8700869679, -0.8819212914, -0.893224299, -0.903989315, -0.9142097831, -0.9238795042, -0.932992816, -0.9415440559, -0.9495281577, -0.9569403529, -0.963776052, -0.9700312614, -0.975702107, -0.9807852507, -0.9852776527, -0.9891765118, -0.9924795628, -0.9951847196, -0.9972904325, -0.9987954497, -0.9996988177, -0, -0.02454122901, -0.04906767607, -0.07356456667, -0.09801714122, -0.1224106774, -0.1467304677, -0.1709618866, -0.1950903237, -0.2191012353, -0.2429801822, -0.266712755, -0.2902846634, -0.3136817515, -0.336889863, -0.3598950505, -0.3826834261, -0.4052413106, -0.4275550842, -0.449611336, -0.4713967443, -0.492898196, -0.514102757, -0.534997642, -0.5555702448, -0.5758081675, -0.5956993103, -0.6152315736, -0.6343932748, -0.6531728506, -0.6715589762, -0.689540565, -0.7071067691, -0.724247098, -0.7409511209, -0.7572088242, -0.7730104327, -0.7883464098, -0.8032075167, -0.8175848126, -0.8314695954, -0.84485358, -0.8577286005, -0.8700869679, -0.8819212914, -0.893224299, -0.903989315, -0.9142097831, -0.9238795042, -0.932992816, -0.9415440559, -0.9495281577, -0.9569403529, -0.963776052, -0.9700312614, -0.975702107, -0.9807852507, -0.9852776527, -0.9891765118, -0.9924795628, -0.9951847196, -0.9972904325, -0.9987954497, -0.9996988177, -0, -0.02454122901, -0.04906767607, -0.07356456667, -0.09801714122, -0.1224106774, -0.1467304677, -0.1709618866, -0.1950903237, -0.2191012353, -0.2429801822, -0.266712755, -0.2902846634, -0.3136817515, -0.336889863, -0.3598950505, -0.3826834261, -0.4052413106, -0.4275550842, -0.449611336, -0.4713967443, -0.492898196, -0.514102757, -0.534997642, -0.5555702448, -0.5758081675, -0.5956993103, -0.6152315736, -0.6343932748, -0.6531728506, -0.6715589762, -0.689540565, -0.7071067691, -0.724247098, -0.7409511209, -0.7572088242, -0.7730104327, -0.7883464098, -0.8032075167, -0.8175848126, -0.8314695954, -0.84485358, -0.8577286005, -0.8700869679, -0.8819212914, -0.893224299, -0.903989315, -0.9142097831, -0.9238795042, -0.932992816, -0.9415440559, -0.9495281577, -0.9569403529, -0.963776052, -0.9700312614, -0.975702107, -0.9807852507, -0.9852776527, -0.9891765118, -0.9924795628, -0.9951847196, -0.9972904325, -0.9987954497, -0.9996988177, -0, -0.02454122901, -0.04906767607, -0.07356456667, -0.09801714122, -0.1224106774, -0.1467304677, -0.1709618866, -0.1950903237, -0.2191012353, -0.2429801822, -0.266712755, -0.2902846634, -0.3136817515, -0.336889863, -0.3598950505, -0.3826834261, -0.4052413106, -0.4275550842, -0.449611336, -0.4713967443, -0.492898196, -0.514102757, -0.534997642, -0.5555702448, -0.5758081675, -0.5956993103, -0.6152315736, -0.6343932748, -0.6531728506, -0.6715589762, -0.689540565, -0.7071067691, -0.724247098, -0.7409511209, -0.7572088242, -0.7730104327, -0.7883464098, -0.8032075167, -0.8175848126, -0.8314695954, -0.84485358, -0.8577286005, -0.8700869679, -0.8819212914, -0.893224299, -0.903989315, -0.9142097831, -0.9238795042, -0.932992816, -0.9415440559, -0.9495281577, -0.9569403529, -0.963776052, -0.9700312614, -0.975702107, -0.9807852507, -0.9852776527, -0.9891765118, -0.9924795628, -0.9951847196, -0.9972904325, -0.9987954497, -0.9996988177, -0, -0.02454122901, -0.04906767607, -0.07356456667, -0.09801714122, -0.1224106774, -0.1467304677, -0.1709618866, -0.1950903237, -0.2191012353, -0.2429801822, -0.266712755, -0.2902846634, -0.3136817515, -0.336889863, -0.3598950505, -0.3826834261, -0.4052413106, -0.4275550842, -0.449611336, -0.4713967443, -0.492898196, -0.514102757, -0.534997642, -0.5555702448, -0.5758081675, -0.5956993103, -0.6152315736, -0.6343932748, -0.6531728506, -0.6715589762, -0.689540565, -0.7071067691, -0.724247098, -0.7409511209, -0.7572088242, -0.7730104327, -0.7883464098, -0.8032075167, -0.8175848126, -0.8314695954, -0.84485358, -0.8577286005, -0.8700869679, -0.8819212914, -0.893224299, -0.903989315, -0.9142097831, -0.9238795042, -0.932992816, -0.9415440559, -0.9495281577, -0.9569403529, -0.963776052, -0.9700312614, -0.975702107, -0.9807852507, -0.9852776527, -0.9891765118, -0.9924795628, -0.9951847196, -0.9972904325, -0.9987954497, -0.9996988177, -0, -0.02454122901, -0.04906767607, -0.07356456667, -0.09801714122, -0.1224106774, -0.1467304677, -0.1709618866, -0.1950903237, -0.2191012353, -0.2429801822, -0.266712755, -0.2902846634, -0.3136817515, -0.336889863, -0.3598950505, -0.3826834261, -0.4052413106, -0.4275550842, -0.449611336, -0.4713967443, -0.492898196, -0.514102757, -0.534997642, -0.5555702448, -0.5758081675, -0.5956993103, -0.6152315736, -0.6343932748, -0.6531728506, -0.6715589762, -0.689540565, -0.7071067691, -0.724247098, -0.7409511209, -0.7572088242, -0.7730104327, -0.7883464098, -0.8032075167, -0.8175848126, -0.8314695954, -0.84485358, -0.8577286005, -0.8700869679, -0.8819212914, -0.893224299, -0.903989315, -0.9142097831, -0.9238795042, -0.932992816, -0.9415440559, -0.9495281577, -0.9569403529, -0.963776052, -0.9700312614, -0.975702107, -0.9807852507, -0.9852776527, -0.9891765118, -0.9924795628, -0.9951847196, -0.9972904325, -0.9987954497, -0.9996988177, -0, -0.02454122901, -0.04906767607, -0.07356456667, -0.09801714122, -0.1224106774, -0.1467304677, -0.1709618866, -0.1950903237, -0.2191012353, -0.2429801822, -0.266712755, -0.2902846634, -0.3136817515, -0.336889863, -0.3598950505, -0.3826834261, -0.4052413106, -0.4275550842, -0.449611336, -0.4713967443, -0.492898196, -0.514102757, -0.534997642, -0.5555702448, -0.5758081675, -0.5956993103, -0.6152315736, -0.6343932748, -0.6531728506, -0.6715589762, -0.689540565, -0.7071067691, -0.724247098, -0.7409511209, -0.7572088242, -0.7730104327, -0.7883464098, -0.8032075167, -0.8175848126, -0.8314695954, -0.84485358, -0.8577286005, -0.8700869679, -0.8819212914, -0.893224299, -0.903989315, -0.9142097831, -0.9238795042, -0.932992816, -0.9415440559, -0.9495281577, -0.9569403529, -0.963776052, -0.9700312614, -0.975702107, -0.9807852507, -0.9852776527, -0.9891765118, -0.9924795628, -0.9951847196, -0.9972904325, -0.9987954497, -0.9996988177}; -constant float ts22[512] = {-0, -0.07356456667, -0.1467304677, -0.2191012353, -0.2902846634, -0.3598950505, -0.4275550842, -0.492898196, -0.5555702448, -0.6152315736, -0.6715589762, -0.724247098, -0.7730104327, -0.8175848126, -0.8577286005, -0.893224299, -0.9238795042, -0.9495281577, -0.9700312614, -0.9852776527, -0.9951847196, -0.9996988177, -0.9987954497, -0.9924795628, -0.9807852507, -0.963776052, -0.9415440559, -0.9142097831, -0.8819212914, -0.84485358, -0.8032075167, -0.7572088242, -0.7071067691, -0.6531728506, -0.5956993103, -0.534997642, -0.4713967443, -0.4052413106, -0.336889863, -0.266712755, -0.1950903237, -0.1224106774, -0.04906767607, 0.02454122901, 0.09801714122, 0.1709618866, 0.2429801822, 0.3136817515, 0.3826834261, 0.449611336, 0.514102757, 0.5758081675, 0.6343932748, 0.689540565, 0.7409511209, 0.7883464098, 0.8314695954, 0.8700869679, 0.903989315, 0.932992816, 0.9569403529, 0.975702107, 0.9891765118, 0.9972904325, -0, -0.07356456667, -0.1467304677, -0.2191012353, -0.2902846634, -0.3598950505, -0.4275550842, -0.492898196, -0.5555702448, -0.6152315736, -0.6715589762, -0.724247098, -0.7730104327, -0.8175848126, -0.8577286005, -0.893224299, -0.9238795042, -0.9495281577, -0.9700312614, -0.9852776527, -0.9951847196, -0.9996988177, -0.9987954497, -0.9924795628, -0.9807852507, -0.963776052, -0.9415440559, -0.9142097831, -0.8819212914, -0.84485358, -0.8032075167, -0.7572088242, -0.7071067691, -0.6531728506, -0.5956993103, -0.534997642, -0.4713967443, -0.4052413106, -0.336889863, -0.266712755, -0.1950903237, -0.1224106774, -0.04906767607, 0.02454122901, 0.09801714122, 0.1709618866, 0.2429801822, 0.3136817515, 0.3826834261, 0.449611336, 0.514102757, 0.5758081675, 0.6343932748, 0.689540565, 0.7409511209, 0.7883464098, 0.8314695954, 0.8700869679, 0.903989315, 0.932992816, 0.9569403529, 0.975702107, 0.9891765118, 0.9972904325, -0, -0.07356456667, -0.1467304677, -0.2191012353, -0.2902846634, -0.3598950505, -0.4275550842, -0.492898196, -0.5555702448, -0.6152315736, -0.6715589762, -0.724247098, -0.7730104327, -0.8175848126, -0.8577286005, -0.893224299, -0.9238795042, -0.9495281577, -0.9700312614, -0.9852776527, -0.9951847196, -0.9996988177, -0.9987954497, -0.9924795628, -0.9807852507, -0.963776052, -0.9415440559, -0.9142097831, -0.8819212914, -0.84485358, -0.8032075167, -0.7572088242, -0.7071067691, -0.6531728506, -0.5956993103, -0.534997642, -0.4713967443, -0.4052413106, -0.336889863, -0.266712755, -0.1950903237, -0.1224106774, -0.04906767607, 0.02454122901, 0.09801714122, 0.1709618866, 0.2429801822, 0.3136817515, 0.3826834261, 0.449611336, 0.514102757, 0.5758081675, 0.6343932748, 0.689540565, 0.7409511209, 0.7883464098, 0.8314695954, 0.8700869679, 0.903989315, 0.932992816, 0.9569403529, 0.975702107, 0.9891765118, 0.9972904325, -0, -0.07356456667, -0.1467304677, -0.2191012353, -0.2902846634, -0.3598950505, -0.4275550842, -0.492898196, -0.5555702448, -0.6152315736, -0.6715589762, -0.724247098, -0.7730104327, -0.8175848126, -0.8577286005, -0.893224299, -0.9238795042, -0.9495281577, -0.9700312614, -0.9852776527, -0.9951847196, -0.9996988177, -0.9987954497, -0.9924795628, -0.9807852507, -0.963776052, -0.9415440559, -0.9142097831, -0.8819212914, -0.84485358, -0.8032075167, -0.7572088242, -0.7071067691, -0.6531728506, -0.5956993103, -0.534997642, -0.4713967443, -0.4052413106, -0.336889863, -0.266712755, -0.1950903237, -0.1224106774, -0.04906767607, 0.02454122901, 0.09801714122, 0.1709618866, 0.2429801822, 0.3136817515, 0.3826834261, 0.449611336, 0.514102757, 0.5758081675, 0.6343932748, 0.689540565, 0.7409511209, 0.7883464098, 0.8314695954, 0.8700869679, 0.903989315, 0.932992816, 0.9569403529, 0.975702107, 0.9891765118, 0.9972904325, -0, -0.07356456667, -0.1467304677, -0.2191012353, -0.2902846634, -0.3598950505, -0.4275550842, -0.492898196, -0.5555702448, -0.6152315736, -0.6715589762, -0.724247098, -0.7730104327, -0.8175848126, -0.8577286005, -0.893224299, -0.9238795042, -0.9495281577, -0.9700312614, -0.9852776527, -0.9951847196, -0.9996988177, -0.9987954497, -0.9924795628, -0.9807852507, -0.963776052, -0.9415440559, -0.9142097831, -0.8819212914, -0.84485358, -0.8032075167, -0.7572088242, -0.7071067691, -0.6531728506, -0.5956993103, -0.534997642, -0.4713967443, -0.4052413106, -0.336889863, -0.266712755, -0.1950903237, -0.1224106774, -0.04906767607, 0.02454122901, 0.09801714122, 0.1709618866, 0.2429801822, 0.3136817515, 0.3826834261, 0.449611336, 0.514102757, 0.5758081675, 0.6343932748, 0.689540565, 0.7409511209, 0.7883464098, 0.8314695954, 0.8700869679, 0.903989315, 0.932992816, 0.9569403529, 0.975702107, 0.9891765118, 0.9972904325, -0, -0.07356456667, -0.1467304677, -0.2191012353, -0.2902846634, -0.3598950505, -0.4275550842, -0.492898196, -0.5555702448, -0.6152315736, -0.6715589762, -0.724247098, -0.7730104327, -0.8175848126, -0.8577286005, -0.893224299, -0.9238795042, -0.9495281577, -0.9700312614, -0.9852776527, -0.9951847196, -0.9996988177, -0.9987954497, -0.9924795628, -0.9807852507, -0.963776052, -0.9415440559, -0.9142097831, -0.8819212914, -0.84485358, -0.8032075167, -0.7572088242, -0.7071067691, -0.6531728506, -0.5956993103, -0.534997642, -0.4713967443, -0.4052413106, -0.336889863, -0.266712755, -0.1950903237, -0.1224106774, -0.04906767607, 0.02454122901, 0.09801714122, 0.1709618866, 0.2429801822, 0.3136817515, 0.3826834261, 0.449611336, 0.514102757, 0.5758081675, 0.6343932748, 0.689540565, 0.7409511209, 0.7883464098, 0.8314695954, 0.8700869679, 0.903989315, 0.932992816, 0.9569403529, 0.975702107, 0.9891765118, 0.9972904325, -0, -0.07356456667, -0.1467304677, -0.2191012353, -0.2902846634, -0.3598950505, -0.4275550842, -0.492898196, -0.5555702448, -0.6152315736, -0.6715589762, -0.724247098, -0.7730104327, -0.8175848126, -0.8577286005, -0.893224299, -0.9238795042, -0.9495281577, -0.9700312614, -0.9852776527, -0.9951847196, -0.9996988177, -0.9987954497, -0.9924795628, -0.9807852507, -0.963776052, -0.9415440559, -0.9142097831, -0.8819212914, -0.84485358, -0.8032075167, -0.7572088242, -0.7071067691, -0.6531728506, -0.5956993103, -0.534997642, -0.4713967443, -0.4052413106, -0.336889863, -0.266712755, -0.1950903237, -0.1224106774, -0.04906767607, 0.02454122901, 0.09801714122, 0.1709618866, 0.2429801822, 0.3136817515, 0.3826834261, 0.449611336, 0.514102757, 0.5758081675, 0.6343932748, 0.689540565, 0.7409511209, 0.7883464098, 0.8314695954, 0.8700869679, 0.903989315, 0.932992816, 0.9569403529, 0.975702107, 0.9891765118, 0.9972904325, -0, -0.07356456667, -0.1467304677, -0.2191012353, -0.2902846634, -0.3598950505, -0.4275550842, -0.492898196, -0.5555702448, -0.6152315736, -0.6715589762, -0.724247098, -0.7730104327, -0.8175848126, -0.8577286005, -0.893224299, -0.9238795042, -0.9495281577, -0.9700312614, -0.9852776527, -0.9951847196, -0.9996988177, -0.9987954497, -0.9924795628, -0.9807852507, -0.963776052, -0.9415440559, -0.9142097831, -0.8819212914, -0.84485358, -0.8032075167, -0.7572088242, -0.7071067691, -0.6531728506, -0.5956993103, -0.534997642, -0.4713967443, -0.4052413106, -0.336889863, -0.266712755, -0.1950903237, -0.1224106774, -0.04906767607, 0.02454122901, 0.09801714122, 0.1709618866, 0.2429801822, 0.3136817515, 0.3826834261, 0.449611336, 0.514102757, 0.5758081675, 0.6343932748, 0.689540565, 0.7409511209, 0.7883464098, 0.8314695954, 0.8700869679, 0.903989315, 0.932992816, 0.9569403529, 0.975702107, 0.9891765118, 0.9972904325}; -constant float ts25[512] = {-0, -0.07356456667, -0.1467304677, -0.2191012353, -0.2902846634, -0.3598950505, -0.4275550842, -0.492898196, -0.5555702448, -0.6152315736, -0.6715589762, -0.724247098, -0.7730104327, -0.8175848126, -0.8577286005, -0.893224299, -0.9238795042, -0.9495281577, -0.9700312614, -0.9852776527, -0.9951847196, -0.9996988177, -0.9987954497, -0.9924795628, -0.9807852507, -0.963776052, -0.9415440559, -0.9142097831, -0.8819212914, -0.84485358, -0.8032075167, -0.7572088242, -0.7071067691, -0.6531728506, -0.5956993103, -0.534997642, -0.4713967443, -0.4052413106, -0.336889863, -0.266712755, -0.1950903237, -0.1224106774, -0.04906767607, 0.02454122901, 0.09801714122, 0.1709618866, 0.2429801822, 0.3136817515, 0.3826834261, 0.449611336, 0.514102757, 0.5758081675, 0.6343932748, 0.689540565, 0.7409511209, 0.7883464098, 0.8314695954, 0.8700869679, 0.903989315, 0.932992816, 0.9569403529, 0.975702107, 0.9891765118, 0.9972904325, -0, -0.07356456667, -0.1467304677, -0.2191012353, -0.2902846634, -0.3598950505, -0.4275550842, -0.492898196, -0.5555702448, -0.6152315736, -0.6715589762, -0.724247098, -0.7730104327, -0.8175848126, -0.8577286005, -0.893224299, -0.9238795042, -0.9495281577, -0.9700312614, -0.9852776527, -0.9951847196, -0.9996988177, -0.9987954497, -0.9924795628, -0.9807852507, -0.963776052, -0.9415440559, -0.9142097831, -0.8819212914, -0.84485358, -0.8032075167, -0.7572088242, -0.7071067691, -0.6531728506, -0.5956993103, -0.534997642, -0.4713967443, -0.4052413106, -0.336889863, -0.266712755, -0.1950903237, -0.1224106774, -0.04906767607, 0.02454122901, 0.09801714122, 0.1709618866, 0.2429801822, 0.3136817515, 0.3826834261, 0.449611336, 0.514102757, 0.5758081675, 0.6343932748, 0.689540565, 0.7409511209, 0.7883464098, 0.8314695954, 0.8700869679, 0.903989315, 0.932992816, 0.9569403529, 0.975702107, 0.9891765118, 0.9972904325, -0, -0.07356456667, -0.1467304677, -0.2191012353, -0.2902846634, -0.3598950505, -0.4275550842, -0.492898196, -0.5555702448, -0.6152315736, -0.6715589762, -0.724247098, -0.7730104327, -0.8175848126, -0.8577286005, -0.893224299, -0.9238795042, -0.9495281577, -0.9700312614, -0.9852776527, -0.9951847196, -0.9996988177, -0.9987954497, -0.9924795628, -0.9807852507, -0.963776052, -0.9415440559, -0.9142097831, -0.8819212914, -0.84485358, -0.8032075167, -0.7572088242, -0.7071067691, -0.6531728506, -0.5956993103, -0.534997642, -0.4713967443, -0.4052413106, -0.336889863, -0.266712755, -0.1950903237, -0.1224106774, -0.04906767607, 0.02454122901, 0.09801714122, 0.1709618866, 0.2429801822, 0.3136817515, 0.3826834261, 0.449611336, 0.514102757, 0.5758081675, 0.6343932748, 0.689540565, 0.7409511209, 0.7883464098, 0.8314695954, 0.8700869679, 0.903989315, 0.932992816, 0.9569403529, 0.975702107, 0.9891765118, 0.9972904325, -0, -0.07356456667, -0.1467304677, -0.2191012353, -0.2902846634, -0.3598950505, -0.4275550842, -0.492898196, -0.5555702448, -0.6152315736, -0.6715589762, -0.724247098, -0.7730104327, -0.8175848126, -0.8577286005, -0.893224299, -0.9238795042, -0.9495281577, -0.9700312614, -0.9852776527, -0.9951847196, -0.9996988177, -0.9987954497, -0.9924795628, -0.9807852507, -0.963776052, -0.9415440559, -0.9142097831, -0.8819212914, -0.84485358, -0.8032075167, -0.7572088242, -0.7071067691, -0.6531728506, -0.5956993103, -0.534997642, -0.4713967443, -0.4052413106, -0.336889863, -0.266712755, -0.1950903237, -0.1224106774, -0.04906767607, 0.02454122901, 0.09801714122, 0.1709618866, 0.2429801822, 0.3136817515, 0.3826834261, 0.449611336, 0.514102757, 0.5758081675, 0.6343932748, 0.689540565, 0.7409511209, 0.7883464098, 0.8314695954, 0.8700869679, 0.903989315, 0.932992816, 0.9569403529, 0.975702107, 0.9891765118, 0.9972904325, -0, -0.07356456667, -0.1467304677, -0.2191012353, -0.2902846634, -0.3598950505, -0.4275550842, -0.492898196, -0.5555702448, -0.6152315736, -0.6715589762, -0.724247098, -0.7730104327, -0.8175848126, -0.8577286005, -0.893224299, -0.9238795042, -0.9495281577, -0.9700312614, -0.9852776527, -0.9951847196, -0.9996988177, -0.9987954497, -0.9924795628, -0.9807852507, -0.963776052, -0.9415440559, -0.9142097831, -0.8819212914, -0.84485358, -0.8032075167, -0.7572088242, -0.7071067691, -0.6531728506, -0.5956993103, -0.534997642, -0.4713967443, -0.4052413106, -0.336889863, -0.266712755, -0.1950903237, -0.1224106774, -0.04906767607, 0.02454122901, 0.09801714122, 0.1709618866, 0.2429801822, 0.3136817515, 0.3826834261, 0.449611336, 0.514102757, 0.5758081675, 0.6343932748, 0.689540565, 0.7409511209, 0.7883464098, 0.8314695954, 0.8700869679, 0.903989315, 0.932992816, 0.9569403529, 0.975702107, 0.9891765118, 0.9972904325, -0, -0.07356456667, -0.1467304677, -0.2191012353, -0.2902846634, -0.3598950505, -0.4275550842, -0.492898196, -0.5555702448, -0.6152315736, -0.6715589762, -0.724247098, -0.7730104327, -0.8175848126, -0.8577286005, -0.893224299, -0.9238795042, -0.9495281577, -0.9700312614, -0.9852776527, -0.9951847196, -0.9996988177, -0.9987954497, -0.9924795628, -0.9807852507, -0.963776052, -0.9415440559, -0.9142097831, -0.8819212914, -0.84485358, -0.8032075167, -0.7572088242, -0.7071067691, -0.6531728506, -0.5956993103, -0.534997642, -0.4713967443, -0.4052413106, -0.336889863, -0.266712755, -0.1950903237, -0.1224106774, -0.04906767607, 0.02454122901, 0.09801714122, 0.1709618866, 0.2429801822, 0.3136817515, 0.3826834261, 0.449611336, 0.514102757, 0.5758081675, 0.6343932748, 0.689540565, 0.7409511209, 0.7883464098, 0.8314695954, 0.8700869679, 0.903989315, 0.932992816, 0.9569403529, 0.975702107, 0.9891765118, 0.9972904325, -0, -0.07356456667, -0.1467304677, -0.2191012353, -0.2902846634, -0.3598950505, -0.4275550842, -0.492898196, -0.5555702448, -0.6152315736, -0.6715589762, -0.724247098, -0.7730104327, -0.8175848126, -0.8577286005, -0.893224299, -0.9238795042, -0.9495281577, -0.9700312614, -0.9852776527, -0.9951847196, -0.9996988177, -0.9987954497, -0.9924795628, -0.9807852507, -0.963776052, -0.9415440559, -0.9142097831, -0.8819212914, -0.84485358, -0.8032075167, -0.7572088242, -0.7071067691, -0.6531728506, -0.5956993103, -0.534997642, -0.4713967443, -0.4052413106, -0.336889863, -0.266712755, -0.1950903237, -0.1224106774, -0.04906767607, 0.02454122901, 0.09801714122, 0.1709618866, 0.2429801822, 0.3136817515, 0.3826834261, 0.449611336, 0.514102757, 0.5758081675, 0.6343932748, 0.689540565, 0.7409511209, 0.7883464098, 0.8314695954, 0.8700869679, 0.903989315, 0.932992816, 0.9569403529, 0.975702107, 0.9891765118, 0.9972904325, -0, -0.07356456667, -0.1467304677, -0.2191012353, -0.2902846634, -0.3598950505, -0.4275550842, -0.492898196, -0.5555702448, -0.6152315736, -0.6715589762, -0.724247098, -0.7730104327, -0.8175848126, -0.8577286005, -0.893224299, -0.9238795042, -0.9495281577, -0.9700312614, -0.9852776527, -0.9951847196, -0.9996988177, -0.9987954497, -0.9924795628, -0.9807852507, -0.963776052, -0.9415440559, -0.9142097831, -0.8819212914, -0.84485358, -0.8032075167, -0.7572088242, -0.7071067691, -0.6531728506, -0.5956993103, -0.534997642, -0.4713967443, -0.4052413106, -0.336889863, -0.266712755, -0.1950903237, -0.1224106774, -0.04906767607, 0.02454122901, 0.09801714122, 0.1709618866, 0.2429801822, 0.3136817515, 0.3826834261, 0.449611336, 0.514102757, 0.5758081675, 0.6343932748, 0.689540565, 0.7409511209, 0.7883464098, 0.8314695954, 0.8700869679, 0.903989315, 0.932992816, 0.9569403529, 0.975702107, 0.9891765118, 0.9972904325}; -constant float ts30[512] = {-0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237}; -constant float ts33[512] = {-0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237, -0, -0.1950903237, -0.3826834261, -0.5555702448, -0.7071067691, -0.8314695954, -0.9238795042, -0.9807852507, -1, -0.9807852507, -0.9238795042, -0.8314695954, -0.7071067691, -0.5555702448, -0.3826834261, -0.1950903237}; -constant float ts31[512] = {-0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196}; -constant float ts34[512] = {-0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196, -0, -0.09801714122, -0.1950903237, -0.2902846634, -0.3826834261, -0.4713967443, -0.5555702448, -0.6343932748, -0.7071067691, -0.7730104327, -0.8314695954, -0.8819212914, -0.9238795042, -0.9569403529, -0.9807852507, -0.9951847196}; -constant float ts32[512] = {-0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529}; -constant float ts35[512] = {-0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529, -0, -0.2902846634, -0.5555702448, -0.7730104327, -0.9238795042, -0.9951847196, -0.9807852507, -0.8819212914, -0.7071067691, -0.4713967443, -0.1950903237, 0.09801714122, 0.3826834261, 0.6343932748, 0.8314695954, 0.9569403529}; -constant float ts40[512] = {-0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691}; -constant float ts43[512] = {-0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691, -0, -0.7071067691, -1, -0.7071067691}; -constant float ts41[512] = {-0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042}; -constant float ts44[512] = {-0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042, -0, -0.3826834261, -0.7071067691, -0.9238795042}; -constant float ts42[512] = {-0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261}; -constant float ts45[512] = {-0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261, -0, -0.9238795042, -0.7071067691, 0.3826834261}; \ No newline at end of file diff --git a/kernels/matrixTranspose/diagonal_bitrev.cl b/kernels/matrixTranspose/diagonal_bitrev.cl index 6eec735..fcbc6e5 100644 --- a/kernels/matrixTranspose/diagonal_bitrev.cl +++ b/kernels/matrixTranspose/diagonal_bitrev.cl @@ -220,4 +220,4 @@ float2x8 readBuf_fetch(float2 buf[DEPTH][POINTS], unsigned step, unsigned delay) data.i7 = rotate_out[7]; return data; -} \ No newline at end of file +} diff --git a/tests/test_fft1d_fpga.cpp b/tests/test_fft1d_fpga.cpp index f75bf38..c7ab436 100644 --- a/tests/test_fft1d_fpga.cpp +++ b/tests/test_fft1d_fpga.cpp @@ -49,7 +49,7 @@ TEST(fft1dFPGATest, InputValiditySVM){ fft_time = fftfpgaf_c2c_1d_svm(N, test, test, false, 1); EXPECT_EQ(fft_time.valid, 0); - int isInit = fpga_initialize("intel(r) fpga sdk for opencl(tm)", "p520_hpc_sg280l/emulation/fft1d_64_nointer/fft1d.aocx", true); + int isInit = fpga_initialize("Intel(R) FPGA Emulation Platform for OpenCL(TM)", "p520_hpc_sg280l/emulation/fft1d_64_nointer/fft1d.aocx", true); ASSERT_EQ(isInit, 0); // null inp ptr input diff --git a/tests/test_fft_setup.cpp b/tests/test_fft_setup.cpp index ab1efe9..9e7b5d7 100644 --- a/tests/test_fft_setup.cpp +++ b/tests/test_fft_setup.cpp @@ -21,7 +21,7 @@ TEST(fftFPGASetupTest, ValidInit){ EXPECT_EQ(fpga_initialize("TEST", "fft1d_emulate.aocx", false), -2); // wrong path argument - const char* platform_name = "intel(r) fpga sdk for opencl(tm)"; + const char* platform_name = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; EXPECT_EQ(fpga_initialize(platform_name, "TEST", false), -4); // right path and platform names diff --git a/tests/test_opencl_utils.cpp b/tests/test_opencl_utils.cpp index 55a58a8..e537823 100755 --- a/tests/test_opencl_utils.cpp +++ b/tests/test_opencl_utils.cpp @@ -31,7 +31,7 @@ TEST_F(OpenCLUtilsTest, FindValidPlatform){ EXPECT_EQ(findPlatform("test"), nullptr); // correct platform name - pl_id = findPlatform("intel(r) fpga sdk for opencl(tm)"); + pl_id = findPlatform("Intel(R) FPGA Emulation Platform for OpenCL(TM)"); ASSERT_NE(pl_id, nullptr); } From 4fab1ae3a4b59f654ea1aa5844c9a75e1d54cb31 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Tue, 12 Oct 2021 18:36:53 +0200 Subject: [PATCH 57/76] gitlab ci update --- .gitlab-ci.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d70594f..ed6e797 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -27,8 +27,6 @@ test-all: stage: test script: - cd build/bin/ - - ldd test_fftfpga - - ldd fftfpga - CL_CONFIG_CPU_EMULATE_DEVICES=1 ./test_fftfpga dependencies: - build-all From c06c22214402fd6016f54b23eb7a1805f5f9cb3f Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Sat, 16 Oct 2021 18:13:12 +0200 Subject: [PATCH 58/76] updated readme --- README.md | 154 ++++++++++++------------------------------------------ 1 file changed, 33 insertions(+), 121 deletions(-) diff --git a/README.md b/README.md index f4e9a1a..b54455e 100644 --- a/README.md +++ b/README.md @@ -13,55 +13,29 @@ This repository provides OpenCL host code in the form of FFTW like APIs, which c - Single Precision (32 bit floating point) - C2C: Complex input to complex output - Out-of-place transforms +- Batched 3D transforms +- OpenCL Shared Virtual Memory (SVM) extensions for data transfers ## Supported FPGAs -The library has been tested on the following FPGAs: +This library has been tested using the following FPGAs present in the [Noctua](https://pc2.uni-paderborn.de/hpc-services/available-systems/noctua1/) cluster of the Paderborn Center for Parallel Computing (PC2) at Paderborn University: -- Intel Stratix 10 GX 2800 -- Intel Arria 10 +- [Bittware 520N](https://www.bittware.com/fpga/520n/) card with Intel Stratix 10 GX 2800 FPGA +- [Intel FPGA PAC D5005](https://www.intel.com/content/www/us/en/programmable/products/boards_and_kits/dev-kits/altera/intel-fpga-pac-d5005/overview.html) card with Intel Stratix 10 SX 2800 FPGA ## Who is using FFTFPGA? - [CP2K](https://github.com/cp2k/cp2k): the quantum chemistry software package has an interface to offload 3d FFTs to Intel FPGAs that uses the OpenCL kernel designs of FFTFPGA. -## Getting Started - - -### Dependencies +## Quick Setup +Firstly, *dependencies* for building the system - [CMake](https://cmake.org/) >= 3.10 -- C Compiler with C11 support -- Intel OpenCL FPGA SDK - -Additional submodules used: - -- [argparse](https://github.com/cofyc/argparse.git) for command line argument parsing -- [hlslib](https://github.com/definelicht/hlslib) for CMake Intel FPGA OpenCL find packages -- [findFFTW](https://github.com/egpbos/findFFTW.git) for CMake FFTW find package -- [gtest](https://github.com/google/googletest.git) for unit tests - -### Structure - -The repository consists of the following: - -- `api` : host code to setup and execute FPGA bitstreams. Compiled to static library that can be linked to your application -- `kernels` : OpenCL kernel code for 1d, 2d and 3d FFT -- `examples`: Sample code that makes use of the api -- `extern` : external packages as submodules required to run the project -- `cmake` : cmake modules used by the build system -- `scripts`: convenience slurm scripts -- `docs` : describes models regarding performance and resource utilization -- `data` : evaluation results and measurements - -### Setup - -FFTFPGA has a CMake build script that can be used to build the project. This consists of two steps: - -1. Building the API that can be linked to your application -2. Building OpenCL Kernel Designs that are used by the API +- C++ compiler with C++11 support (GCC 4.9.0+) +- Intel FPGA SDK for OpenCL +- FFTW3 -#### API +Once you have this covered, execute the following: ```bash mkdir build && cd build # Directory to store build outputs @@ -69,118 +43,56 @@ cmake .. make ``` -This generates the following: +You have built the *API* i.e., the OpenCL host code that invokes different transformations correctly are packed into a static library. This must be linked to an application. -- `fftfpga` static library to link such as `-lfftfpga` -- `fftfpga/fftfpga.h` header file +A sample application that helps invoke the APIs. -The sample programs given in the `example` directory are also compiled to binaries of their respective names, which makes use of the files given previously. +*Strictly said*, you have built the following: -#### OpenCL Kernel Designs +- `fftfpga` static library, linked such as `-lfftfpga` +- `fftfpga/fftfpga.h` header file +- `fft` - a sample application which links and includes the above two. -FFTFPGA provides OpenCL designs that can be compiled for different options: +Now, for the real deal, synthesizing the OpenCL FFT kernels. These can be synthesized to run on software emulation or on hardware as bitstreams. - Emulation ```bash make _emu -make fft1d_emulate +make fft3d_ddr_emulate ``` -- Report Generation - -```bash -make _rep -make fft1d_rep -``` - -- Synthesis +- Hardware Bitstream ```bash make _syn -make fft1d_syn +make fft3d_ddr_syn ``` -Paths to these bitstreams should be provided as parameters to certain API calls to execute the design. - -### Examples - -#### Additional Dependency - -- FFTW3 - -#### Execution +Putting them all together, set the path to the synthesized bitstream along with other correct configurations as command line parameters to the sample application generated, to execute the transformation. ```bash -./fft3d -n 64 -m -s -p emu_64_fft3d_bram/fft3d_bram.aocx +./fft --num=64 --dim=3 --path=fft3d_ddr_128.aocx ``` -Prepend the command with `CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA=1`for emulation. - -#### Compile Definitions +*Tip*: for emulation, use the `--emulate` command line parameter. -- `LOG_SIZE`: set the log of the length of the matrix. Example: `-DLOG_SIZE=6`. - -#### Runtime Input Parameters - -```bash - -h, --help show this help message and exit - -Basic Options - -n, --n= FFT Points - -s, --sp Single Precision - -i, --iter= Iterations - -b, --back Backward FFT - -v, --svm Use SVM - -m, --bram Use BRAM - -p, --path= Path to bitstreamm -``` +For explanations regarding the command line options and the OpenCL kernels, check out x and y in the advanced guide. -#### Output - -The examples measure and output relevant performance metrics that are shown below: - -```bash ------------------------------------------- -FFT Configuration: --------------------------------------------- -Type = Complex to Complex -Points = 64 -Precision = Single -Direction = Forward -Placement = In Place -Iterations = 1 --------------------------------------------- - - Initializing FPGA ... - Getting program binary from path emu_64_fft3d_bram/fft3d_bram.aocx ... - Building program ... - FFT kernel initialization is complete. - Cleaning up FPGA resources ... - ------------------------------------------- -Measurements --------------------------------------------- -Points = 64 -Precision = Single -Direction = Forward -PCIe Write = 0.03ms -Kernel Execution = 0.48ms -PCIe Read = 0.02ms -Throughput = 0.00GFLOPS/s | 0.00 GB/s -``` +## Publications -- `PCIe Write` and `PCIe Read` the time taken in milliseconds for transfer of data from host to global memory through PCIe bus. +FFTFPGA has been cited in the following publications: -- `Kernel Execution` represents the time taken in milliseconds for the execution of the OpenCL implementation that includes the global memory accesses. +1. Evaluating the Design Space for Offloading 3D FFT Calculations to an FPGA for High-Performance Computing : https://doi.org/10.1007/978-3-030-79025-7_21 -## Publications +2. CP2K: An electronic structure and molecular dynamics software package - Quickstep: Efficient and accurate electronic structure calculations: https://doi.org/10.1063/5.0007045 -FFTFPGA has been cited in the following publications: +3. Efficient Ab-Initio Molecular Dynamic Simulations by Offloading Fast Fourier Transformations to FPGAs : https://doi.org/10.1109/FPL50879.2020.00065 -1. CP2K: An electronic structure and molecular dynamics software package - Quickstep: Efficient and accurate electronic structure calculations: https://doi.org/10.1063/5.0007045 +## Related Repositories -2. Efficient Ab-Initio Molecular Dynamic Simulations by Offloading Fast Fourier Transformations to FPGAs ([preprint](https://arxiv.org/abs/2006.08435)) +- ConvFPGA - an OpenCL based library for FFT-based convolution on FPGAs +- FFTFPGA-eval ## Contact From 1a97c92ce0056bcbfa024c22c587643083d39e7b Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Sat, 16 Oct 2021 18:13:25 +0200 Subject: [PATCH 59/76] updated changelog --- CHANGELOG | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 56b29f4..7e4b868 100755 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,19 +2,12 @@ All notable changes to this project will be documented in this file. ## Unreleased -- configurable platform name -- choice of platform (currently chooses first platform) -- choice of device (currently chooses first device) -- loads binary to multiple devices (currently only the first device) -- batch mode -- xilinx fpgas +- configurable CL platform and device ## [2.0.0] - [] -- SVM API: coarse grained SVM support for Intel FPGAs -- Doxygen Doc -- Emulator option using runtime cmd line args -- Command line argument to enable and disable burst interleaved global memory accesses +- Batched 3D FFT to schedule multiple transformations by overlapping data transfers and FFT computations +- Using OpenCL Shared Virtual Memory (SVM) for data transfers between FPGA and host ## [1.0.0] - [16.06.2020] From 4a8bec086fb5004f60fd319f4a04a3b52f3200f4 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Sat, 16 Oct 2021 18:14:07 +0200 Subject: [PATCH 60/76] fixed args in fft1d, fft2d --- api/include/fftfpga/fftfpga.h | 6 +++--- api/src/fft1d.c | 2 -- api/src/fft2d.c | 26 ++++++-------------------- 3 files changed, 9 insertions(+), 25 deletions(-) diff --git a/api/include/fftfpga/fftfpga.h b/api/include/fftfpga/fftfpga.h index 31e3935..06f0b42 100755 --- a/api/include/fftfpga/fftfpga.h +++ b/api/include/fftfpga/fftfpga.h @@ -117,7 +117,7 @@ extern fpga_t fftfpgaf_c2c_1d_svm(const unsigned N, const float2 *inp, float2 *o * @param how_many : number of 2D FFTs to computer, default 1 * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpgaf_c2c_2d_bram(int N, const float2 *inp, float2 *out, bool inv, bool interleaving, int how_many); +extern fpga_t fftfpgaf_c2c_2d_bram(const unsigned N, const float2 *inp, float2 *out, const bool inv, const bool interleaving, const unsigned how_many); /** * @brief compute an out-of-place single precision complex 2DFFT using the BRAM of the FPGA and Shared Virtual Memory for Host to Device Communication @@ -128,7 +128,7 @@ extern fpga_t fftfpgaf_c2c_2d_bram(int N, const float2 *inp, float2 *out, bool i * @param how_many : number of 2D FFTs to computer, default 1 * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpgaf_c2c_2d_bram_svm(int N, const float2 *inp, float2 *out, bool inv, int how_many); +extern fpga_t fftfpgaf_c2c_2d_bram_svm(const unsigned N, const float2 *inp, float2 *out, const bool inv, const unsigned how_many); /** * @brief compute an out-of-place single precision complex 2D-FFT using the DDR of the FPGA @@ -138,7 +138,7 @@ extern fpga_t fftfpgaf_c2c_2d_bram_svm(int N, const float2 *inp, float2 *out, bo * @param inv : int toggle to activate backward FFT * @return fpga_t : time taken in milliseconds for data transfers and execution */ -extern fpga_t fftfpgaf_c2c_2d_ddr(int N, const float2 *inp, float2 *out, bool inv); +extern fpga_t fftfpgaf_c2c_2d_ddr(const unsigned N, const float2 *inp, float2 *out, const bool inv); /** * @brief compute an out-of-place single precision complex 3D-FFT using the BRAM of the FPGA diff --git a/api/src/fft1d.c b/api/src/fft1d.c index 7c64122..88fda49 100644 --- a/api/src/fft1d.c +++ b/api/src/fft1d.c @@ -265,8 +265,6 @@ fpga_t fftfpgaf_c2c_1d_svm(const unsigned N, const float2 *inp, float2 *out, con return fft_time; } - printf("-- Launching%s 1D FFT of %d batches using SVM\n", inv ? " inverse":"", batch); - // Can't pass bool to device, so convert it to int int inverse_int = (int)inv; diff --git a/api/src/fft2d.c b/api/src/fft2d.c index 4cec976..78359a0 100644 --- a/api/src/fft2d.c +++ b/api/src/fft2d.c @@ -24,7 +24,7 @@ * \param iter : int toggle to activate backward FFT * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_2d_ddr(int N, const float2 *inp, float2 *out, bool inv){ +fpga_t fftfpgaf_c2c_2d_ddr(const unsigned N, const float2 *inp, float2 *out, const bool inv){ fpga_t fft_time = {0.0, 0.0, 0.0, 0.0, 0}; cl_kernel fetch_kernel = NULL, fft_kernel = NULL, transpose_kernel = NULL; cl_int status = 0; @@ -35,10 +35,6 @@ fpga_t fftfpgaf_c2c_2d_ddr(int N, const float2 *inp, float2 *out, bool inv){ return fft_time; } -#ifdef VERBOSE - printf("Launching%s 2d FFT transform \n", inv ? " inverse":""); -#endif - queue_setup(); cl_mem d_inData, d_outData, d_tmp; @@ -171,24 +167,20 @@ fpga_t fftfpgaf_c2c_2d_ddr(int N, const float2 *inp, float2 *out, bool inv){ * \param interleaving : enable interleaved global memory buffers * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_2d_bram(int N, const float2 *inp, float2 *out, bool inv, bool interleaving, int how_many){ +fpga_t fftfpgaf_c2c_2d_bram(const unsigned N, const float2 *inp, float2 *out, const bool inv, const bool interleaving, const unsigned how_many){ fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_kernel ffta_kernel = NULL, fftb_kernel = NULL; cl_kernel fetch_kernel = NULL, store_kernel = NULL; cl_kernel transpose_kernel = NULL; cl_int status = 0; - int num_pts = how_many * N * N; + unsigned num_pts = how_many * N * N; // if N is not a power of 2 if(inp == NULL || out == NULL || ( (N & (N-1)) !=0)){ return fft_time; } -#ifdef VERBOSE - printf("Launching%s 3d FFT transform in DDR \n", inv ? " inverse":""); -#endif - queue_setup(); cl_mem_flags flagbuf1, flagbuf2; @@ -217,7 +209,6 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, const float2 *inp, float2 *out, bool inv, boo status = clFinish(queue1); checkError(status, "failed to finish"); - cl_ulong writeBuf_start = 0.0, writeBuf_end = 0.0; clGetEventProfilingInfo(writeBuf_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &writeBuf_start, NULL); @@ -354,23 +345,18 @@ fpga_t fftfpgaf_c2c_2d_bram(int N, const float2 *inp, float2 *out, bool inv, boo * \param inv : int toggle to activate backward FFT * \return fpga_t : time taken in milliseconds for data transfers and execution */ -fpga_t fftfpgaf_c2c_2d_bram_svm(int N, const float2 *inp, float2 *out, bool inv, int how_many){ +fpga_t fftfpgaf_c2c_2d_bram_svm(const unsigned N, const float2 *inp, float2 *out, const bool inv, const unsigned how_many){ fpga_t fft_time = {0.0, 0.0, 0.0, 0}; cl_int status = 0; - int num_pts = how_many * N * N; + unsigned num_pts = how_many * N * N; cl_kernel ffta_kernel = NULL, fftb_kernel = NULL; cl_kernel fetch_kernel = NULL, store_kernel = NULL; cl_kernel transpose_kernel = NULL; // if N is not a power of 2 - if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || (!svm_enabled)){ + if(inp == NULL || out == NULL || ( (N & (N-1)) !=0) || (!svm_enabled)) return fft_time; - } - -#ifdef VERBOSE - printf("Launching%s 2d FFT transform in BRAM using SVM\n", inv ? " inverse":""); -#endif queue_setup(); From 3dae9e1fd27306c5a4c5ffc384a2aa6f56aa0cfe Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Sat, 16 Oct 2021 18:14:47 +0200 Subject: [PATCH 61/76] fixed ddr batch and renamed queues --- api/src/fft3d.c | 161 +++++++++++++++++++++++++++--------------------- 1 file changed, 90 insertions(+), 71 deletions(-) diff --git a/api/src/fft3d.c b/api/src/fft3d.c index f2ced99..8395d0d 100644 --- a/api/src/fft3d.c +++ b/api/src/fft3d.c @@ -135,19 +135,19 @@ fpga_t fftfpgaf_c2c_3d_bram(const unsigned N, const float2 *inp, float2 *out, co // Wait for all command queues to complete pending events status = clFinish(queue1); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue1"); status = clFinish(queue2); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue2"); status = clFinish(queue3); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue3"); status = clFinish(queue4); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue4"); status = clFinish(queue5); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue5"); status = clFinish(queue6); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue6"); status = clFinish(queue7); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue7"); cl_ulong kernel_start = 0, kernel_end = 0; @@ -320,19 +320,19 @@ fpga_t fftfpgaf_c2c_3d_ddr(const unsigned N, const float2 *inp, float2 *out, con checkError(status, "Failed to launch fetch kernel"); status = clFinish(queue1); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue1"); status = clFinish(queue2); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue2"); status = clFinish(queue3); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue3"); status = clFinish(queue4); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue4"); status = clFinish(queue5); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue5"); status = clFinish(queue6); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue6"); status = clFinish(queue7); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue7"); cl_ulong kernel_start = 0, kernel_end = 0; clGetEventProfilingInfo(startExec_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start, NULL); @@ -494,7 +494,7 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(const unsigned N, const float2 *inp, float2 *ou status = clEnqueueWriteBuffer(queue1, d_inData1, CL_TRUE, 0, sizeof(float2) * num_pts, inp, 0, NULL, NULL); status = clFinish(queue1); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue1"); // Second Phase // Unblocking write to DDR second buffer from index num_pts @@ -503,12 +503,6 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(const unsigned N, const float2 *inp, float2 *ou checkError(status, "Failed to write to DDR buffer"); // Compute First FFT already transferred - status = clEnqueueTask(queue3, store_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); - - status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fft kernel"); - status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fetch kernel"); @@ -531,21 +525,28 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(const unsigned N, const float2 *inp, float2 *ou status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); checkError(status, "Failed to launch transpose3D_kernel kernel"); + status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fft kernel"); + + status = clEnqueueTask(queue3, store_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch store kernel"); + // Check finish of transfer and computations clWaitForEvents(1, &write_event[0]); clReleaseEvent(write_event[0]); - status = clFinish(queue1); - checkError(status, "failed to finish"); - status = clFinish(queue2); - checkError(status, "failed to finish"); - status = clFinish(queue3); - checkError(status, "failed to finish"); - status = clFinish(queue4); - checkError(status, "failed to finish"); - status = clFinish(queue5); - checkError(status, "failed to finish"); + status = clFinish(queue6); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue 6"); + status = clFinish(queue5); + checkError(status, "failed to finish queue 5"); + status = clFinish(queue4); + checkError(status, "failed to finish queue 4"); + status = clFinish(queue3); + checkError(status, "failed to finish queue 3"); + status = clFinish(queue2); + checkError(status, "failed to finish queue 2"); + status = clFinish(queue1); + checkError(status, "failed to finish queue 1"); // Loop over the 3 stages for(size_t i = 0; i < how_many-2; i++){ @@ -645,15 +646,32 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(const unsigned N, const float2 *inp, float2 *ou } // Set Kernel Arguments before execution - status = clEnqueueTask(queue3, store_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); + status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); - status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fft kernel"); + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second fft kernel"); + status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); checkError(status, "Failed to launch transpose3D_kernel kernel"); + status = clFinish(queue1); + checkError(status, "failed to finish queue 1"); + status = clFinish(queue2); + checkError(status, "failed to finish queue 2"); + status = clFinish(queue3); + checkError(status, "failed to finish queue 3"); + status = clFinish(queue4); + checkError(status, "failed to finish queue 4"); + status = clFinish(queue5); + checkError(status, "failed to finish queue 5"); + mode = RD_GLOBALMEM; status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); checkError(status, "Failed to set transpose3D kernel arg 2"); @@ -661,32 +679,26 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(const unsigned N, const float2 *inp, float2 *ou status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); checkError(status, "Failed to launch transpose3D_kernel kernel"); - status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second fft kernel"); - - status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); - - status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fft kernel"); - status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); + status = clEnqueueTask(queue3, store_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); status = clFinish(queue1); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue1"); status = clFinish(queue2); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue2"); status = clFinish(queue3); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue3"); status = clFinish(queue4); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue4"); status = clFinish(queue5); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue5"); status = clFinish(queue6); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue6"); status = clFinish(queue7); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue7"); clWaitForEvents(2, write_event); clReleaseEvent(write_event[0]); @@ -774,15 +786,32 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(const unsigned N, const float2 *inp, float2 *ou checkError(status, "Failed to set store2 kernel arg"); } - status = clEnqueueTask(queue3, store_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); + status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch fetch kernel"); - status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fft kernel"); + status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch transpose kernel"); + + status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch second fft kernel"); + status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); checkError(status, "Failed to launch transpose3D_kernel kernel"); + status = clFinish(queue1); + checkError(status, "failed to finish queue 1"); + status = clFinish(queue2); + checkError(status, "failed to finish queue 2"); + status = clFinish(queue3); + checkError(status, "failed to finish queue 3"); + status = clFinish(queue4); + checkError(status, "failed to finish queue 4"); + status = clFinish(queue5); + checkError(status, "failed to finish queue 5"); + mode = RD_GLOBALMEM; status=clSetKernelArg(transpose3D_kernel, 2, sizeof(cl_int), (void*)&mode); checkError(status, "Failed to set transpose3D kernel arg 2"); @@ -790,32 +819,22 @@ fpga_t fftfpgaf_c2c_3d_ddr_batch(const unsigned N, const float2 *inp, float2 *ou status = clEnqueueTask(queue5, transpose3D_kernel, 0, NULL, NULL); checkError(status, "Failed to launch transpose3D_kernel kernel"); - status = clEnqueueTask(queue4, fftb_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch second fft kernel"); - - status = clEnqueueTask(queue3, transpose_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch transpose kernel"); - - status = clEnqueueTask(queue2, ffta_kernel, 0, NULL, NULL); + status = clEnqueueTask(queue4, fftc_kernel, 0, NULL, NULL); checkError(status, "Failed to launch fft kernel"); - status = clEnqueueTask(queue1, fetch_kernel, 0, NULL, NULL); - checkError(status, "Failed to launch fetch kernel"); + status = clEnqueueTask(queue3, store_kernel, 0, NULL, NULL); + checkError(status, "Failed to launch store kernel"); clWaitForEvents(1, &write_event[0]); clReleaseEvent(write_event[0]); - status = clFinish(queue1); - checkError(status, "failed to finish"); - status = clFinish(queue2); - checkError(status, "failed to finish"); status = clFinish(queue3); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue3"); status = clFinish(queue4); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue4"); status = clFinish(queue5); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue5"); status = clFinish(queue6); - checkError(status, "failed to finish"); + checkError(status, "failed to finish queue6"); if( (how_many % 4) == 0){ status = clEnqueueReadBuffer(queue6, d_outData4, CL_FALSE, 0, sizeof(float2) * num_pts, &out[(how_many - 1) * num_pts], 0, NULL, &write_event[0]); From a6146800295f85a380985466c7da200802c3a5f9 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Sat, 16 Oct 2021 18:15:05 +0200 Subject: [PATCH 62/76] added additional variants to application selection --- examples/fft.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/examples/fft.cpp b/examples/fft.cpp index 15476a7..8d50cb8 100644 --- a/examples/fft.cpp +++ b/examples/fft.cpp @@ -36,11 +36,19 @@ int main(int argc, char* argv[]){ const bool burst = config.burst; for(unsigned i = 0; i < config.iter; i++){ + cout << i << ": Calculating FFT - " << endl; switch(config.dim) { - case 1: runtime[i] = fftfpgaf_c2c_1d(num, inp, out, inv, config.batch); - break; + case 1: { + if(config.use_usm) + runtime[i] = fftfpgaf_c2c_1d_svm(num, inp, out, inv, config.batch); + else + runtime[i] = fftfpgaf_c2c_1d(num, inp, out, inv, config.batch); + break; + } case 2: { - if(config.use_bram) + if(config.use_bram && config.use_usm) + runtime[i] = fftfpgaf_c2c_2d_bram_svm(num, inp, out, inv, config.batch); + else if(config.use_bram && !config.use_usm) runtime[i] = fftfpgaf_c2c_2d_bram(num, inp, out, inv, burst, config.batch); else runtime[i] = fftfpgaf_c2c_2d_ddr(num, inp, out, inv); From 2ec2aa7bff83b05918e689a332bb6faf6fcfb6fb Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Sat, 16 Oct 2021 18:15:30 +0200 Subject: [PATCH 63/76] expressive emulation and report generation --- cmake/genKernelTargets.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/genKernelTargets.cmake b/cmake/genKernelTargets.cmake index 4fc72b5..6474677 100644 --- a/cmake/genKernelTargets.cmake +++ b/cmake/genKernelTargets.cmake @@ -32,7 +32,7 @@ function(gen_fft_targets) VERBATIM ) - add_custom_target(${kernel_fname}_emu + add_custom_target(${kernel_fname}_emulate DEPENDS ${EMU_BSTREAM} ${CL_SRC} ${CL_HEADER} COMMENT "Building ${kernel_fname} for emulation to folder ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}" @@ -45,7 +45,7 @@ function(gen_fft_targets) VERBATIM ) - add_custom_target(${kernel_fname}_rep + add_custom_target(${kernel_fname}_report DEPENDS ${REP_BSTREAM} ${CL_SRC} ${CL_HEADER} COMMENT "Building a report for ${kernel_fname} to folder ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}" From 7c008f8279e8699172383286c6bf9d02c6d7da82 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Sat, 16 Oct 2021 18:16:07 +0200 Subject: [PATCH 64/76] cleaned up application files --- examples/common/helper.c | 152 -------------------------------- examples/common/helper.h | 18 ---- examples/common/verify_fftw.c | 97 --------------------- examples/common/verify_fftw.h | 10 --- examples/fft1d.c | 123 -------------------------- examples/fft1d_svm.c | 121 -------------------------- examples/fft2d.c | 142 ------------------------------ examples/fft2d_bram_svm.c | 131 ---------------------------- examples/fft3d_bram.c | 134 ---------------------------- examples/fft3d_ddr.c | 138 ----------------------------- examples/fft3d_ddr_batch.c | 139 ----------------------------- examples/fft3d_ddr_svm.c | 149 ------------------------------- examples/fft3d_ddr_svm_batch.c | 154 --------------------------------- examples/fft3d_svm.c | 140 ------------------------------ 14 files changed, 1648 deletions(-) delete mode 100755 examples/common/helper.c delete mode 100755 examples/common/helper.h delete mode 100644 examples/common/verify_fftw.c delete mode 100644 examples/common/verify_fftw.h delete mode 100644 examples/fft1d.c delete mode 100644 examples/fft1d_svm.c delete mode 100644 examples/fft2d.c delete mode 100644 examples/fft2d_bram_svm.c delete mode 100755 examples/fft3d_bram.c delete mode 100755 examples/fft3d_ddr.c delete mode 100755 examples/fft3d_ddr_batch.c delete mode 100755 examples/fft3d_ddr_svm.c delete mode 100755 examples/fft3d_ddr_svm_batch.c delete mode 100644 examples/fft3d_svm.c diff --git a/examples/common/helper.c b/examples/common/helper.c deleted file mode 100755 index cdc0a1e..0000000 --- a/examples/common/helper.c +++ /dev/null @@ -1,152 +0,0 @@ -// Author: Arjun Ramaswami - -#define _POSIX_C_SOURCE 199309L -#include -#include -#include -#include -#include "helper.h" -#include -#include -#include -#define _USE_MATH_DEFINES - -/** - * \brief create random single precision complex floating point values - * \param inp : pointer to float2 data of size N - * \param N : number of points in the array - * \return true if successful - */ -bool fftf_create_data(float2 *inp, unsigned num_pts){ - - if(inp == NULL || num_pts <= 0){ - return false; - } - - for(size_t i = 0; i < num_pts; i++){ - inp[i].x = (float)((float)rand() / (float)RAND_MAX); - inp[i].y = (float)((float)rand() / (float)RAND_MAX); - } - - return true; -} - -/** - * \brief create random double precision complex floating point values - * \param inp : pointer to double2 data of size inp_sz - * \param inp_sz : number of points in the array - * \return true if successful - */ -bool fft_create_data(double2 *inp, unsigned num_pts){ - - if(inp == NULL || num_pts <= 0){ - return false; - } - - for(size_t i = 0; i < num_pts; i++){ - inp[i].x = (double)((double)rand() / (double)RAND_MAX); - inp[i].y = (double)((double)rand() / (double)RAND_MAX); - } - - return true; -} - -/** - * \brief print configuration chosen to execute on FPGA - * \param N: fft size - * \param dim: number of dimensions of size - * \param iter: number of iterations of each transformation (if BATCH mode) - * \param inv: true for backward transform - * \param sp: true for single precision floating point transformation - * \param use_bram: true if transpose uses BRAM, not DDR (valid for 2d and 3d FFT) - * \param interleaving: true if data should be interleaved amongst the banks in DDR memory - */ -void print_config(int N, int dim, int iter, bool inv, bool sp, int batch, bool use_bram, bool interleaving){ - printf("\n------------------------------------------\n"); - printf("FFT Configuration: \n"); - printf("--------------------------------------------\n"); - printf("Type = Complex to Complex\n"); - printf("Points = %d%s \n", N, dim == 1 ? "" : dim == 2 ? "^2" : "^3"); - printf("Precision = %s \n", sp ? "Single": "Double"); - printf("Direction = %s \n", inv ? "Backward":"Forward"); - printf("Placement = In Place \n"); - printf("Batch = %d \n", batch); - printf("Iterations = %d \n", iter); - printf("Transpose = %s \n", use_bram ? "BRAM":"DDR"); - printf("Interleaving = %s \n", interleaving ? "Yes":"No"); - printf("--------------------------------------------\n\n"); -} - -/** - * \brief print time taken for fpga and fftw runs to a file - * \param total_api_time: time taken to call iter times the host code - * \param timing: kernel execution and pcie transfer timing - * \param N: fft size - * \param dim: number of dimensions of size - * \param iter: number of iterations of each transformation (if BATCH mode) - * \param inv: true if backward transform - * \param single precision floating point transformation - */ -void display_measures(double total_api_time, double pcie_rd, double pcie_wr, double exec_t, double hw_pcie_rd, double hw_pcie_wr, double hw_exec, int N, int dim, int iter, int batch, bool inv, bool sp){ - - double avg_api_time = 0.0; - - if (total_api_time != 0.0){ - avg_api_time = total_api_time / iter; - } - - double pcie_read = pcie_rd / iter; - double pcie_write = pcie_wr / iter; - double exec = exec_t / iter; - - double hw_pcie_read = hw_pcie_rd / iter; - double hw_pcie_write = hw_pcie_wr / iter; - double hw_execution = hw_exec / iter; - - double gpoints_per_sec = (batch * pow(N, dim)) / (hw_execution * 1e-3 * 1024 * 1024 * 1024); - double gBytes_per_sec = 0.0; - - if(sp){ - gBytes_per_sec = gpoints_per_sec * 8; // bytes - } - else{ - gBytes_per_sec *= gpoints_per_sec * 16; - } - - double gflops = batch * dim * 5 * pow(N, dim) * (log((double)N)/log((double)2))/(exec * 1e-3 * 1024*1024*1024); - - printf("\n\n------------------------------------------\n"); - printf("Measurements \n"); - printf("--------------------------------------------\n"); - printf("Points = %d%s \n", N, dim == 1 ? "" : dim == 2 ? "^2" : "^3"); - printf("Precision = %s\n", sp ? "Single": "Double"); - printf("Direction = %s\n", inv ? "Backward":"Forward"); - printf("Iterations = %d\n", iter); - printf("Batch = %d\n", batch); - - printf("%s", iter>1 ? "Average Measurements of iterations\n":""); - printf("PCIe Write = %.4lfms\n", pcie_write); - printf("Kernel Execution = %.4lfms\n", exec); - printf("PCIe Read = %.4lfms\n", pcie_read); - printf("Total = %.4lfms\n", pcie_read + exec + pcie_write); - printf("HW PCIe Write = %.4lfms\n", hw_pcie_write); - printf("HW Kernel Execution = %.4lfms\n", hw_execution); - printf("HW PCIe Read = %.4lfms\n", hw_pcie_read); - printf("Hw Total = %.4lfms\n", hw_pcie_write + hw_execution + hw_pcie_read); - printf("Throughput = %.4lfGFLOPS/s | %.4lf GB/s\n", gflops, gBytes_per_sec); - printf("API runtime = %.4lfms\n", avg_api_time); - -} - -/** - * \brief compute walltime in milliseconds - * \return time in milliseconds - */ -double getTimeinMilliseconds(){ - struct timespec a; - if(clock_gettime(CLOCK_MONOTONIC, &a) != 0){ - fprintf(stderr, "Error in getting wall clock time \n"); - exit(EXIT_FAILURE); - } - return (double)(a.tv_nsec) * 1.0e-6 + (double)(a.tv_sec) * 1.0E3; -} \ No newline at end of file diff --git a/examples/common/helper.h b/examples/common/helper.h deleted file mode 100755 index 206eedc..0000000 --- a/examples/common/helper.h +++ /dev/null @@ -1,18 +0,0 @@ -// Author: Arjun Ramaswami - -#ifndef HELPER_H -#define HELPER_H - -#include -#include "fftfpga/fftfpga.h" - -bool fftf_create_data(float2 *inp, unsigned N); - -bool fft_create_data(double2 *inp, unsigned N); - -void print_config(int N, int dim, int iter, bool inv, bool sp, int batch, bool use_bram, bool interleaving); - -void display_measures(double total_api_time, double pcie_rd, double pcie_wr, double exec, double avg_hw_pcie_rd, double avg_hw_pcie_wr, double avg_hw_exec, int N, int dim, int iter, int batch, bool inv, bool sp); - -double getTimeinMilliseconds(); -#endif // HELPER_H diff --git a/examples/common/verify_fftw.c b/examples/common/verify_fftw.c deleted file mode 100644 index 9b8823e..0000000 --- a/examples/common/verify_fftw.c +++ /dev/null @@ -1,97 +0,0 @@ -// Author: Arjun Ramaswami -#include -#include -#include -#include -#include "fftfpga/fftfpga.h" - -#ifdef USE_FFTW -#include - -/** - * \brief Verify FFT computed in FPGA with FFTW - * \param fpga_out: pointer to FPGA computation for sp complex data - * \param fftw_data: pointer to FFT sized allocation of sp complex data for fftw cpu computation - * \param N: number of points per dimension of FFT3d - * \param dim: number of dimensions of points - * \param inverse: true if backward FFT - * \param how_many: default is 1 - * \return true if verification passed - */ -bool verify_fftwf(float2 *fpgaout, float2 *verify, int N, int dim, bool inverse, int how_many){ - - // Copy inp data to verify using FFTW - // requires allocating data specifically for FFTW computation - size_t num_pts = how_many * pow(N, dim); - fftwf_complex *fftw_data = fftwf_alloc_complex(num_pts); - - for(size_t i = 0; i < num_pts; i++){ - fftw_data[i][0] = verify[i].x; - fftw_data[i][1] = verify[i].y; - } - - int *n = (int*)calloc(N * dim , sizeof(int)); - for(size_t i = 0; i < dim; i++){ - n[i] = N; - } - - // Compute 3d FFT using FFTW - // Create Plan using simple heuristic and in place FFT - fftwf_plan plan; - //const int n[] = {N, N, N}; - //int idist = N*N*N, odist = N*N*N; - int idist = pow(N, dim); - int odist = pow(N, dim); - int istride = 1, ostride = 1; // contiguous in memory - - if(inverse){ - plan = fftwf_plan_many_dft(dim, n, how_many, &fftw_data[0], NULL, istride, idist, fftw_data, NULL, ostride, odist, FFTW_BACKWARD, FFTW_ESTIMATE); - //plan = fftwf_plan_dft_3d( N, N, N, &fftw_data[0], &fftw_data[0], FFTW_BACKWARD, FFTW_ESTIMATE); - } - else{ - plan = fftwf_plan_many_dft(dim, n, how_many, &fftw_data[0], NULL, istride, idist, fftw_data, NULL, ostride, odist, FFTW_FORWARD, FFTW_ESTIMATE); - //plan = fftwf_plan_dft_3d( N, N, N, &fftw_data[0], &fftw_data[0], FFTW_FORWARD, FFTW_ESTIMATE); - } - - // Execute in place FFTW based on plan created - fftwf_execute(plan); - - // verify by calculating signal-to-noise ratio (SNR) - float mag_sum = 0, noise_sum = 0, magnitude, noise; - - for (size_t i = 0; i < num_pts; i++) { - - magnitude = fftw_data[i][0] * fftw_data[i][0] + \ - fftw_data[i][1] * fftw_data[i][1]; - noise = (fftw_data[i][0] - fpgaout[i].x) \ - * (fftw_data[i][0] - fpgaout[i].x) + - (fftw_data[i][1] - fpgaout[i].y) * (fftw_data[i][1] - fpgaout[i].y); - - mag_sum += magnitude; - noise_sum += noise; -#ifndef NDEBUG - //printf("%zu : fpga - (%e %e) cpu - (%e %e)\n", i, fpgaout[i].x, fpgaout[i].y, fftw_data[i][0], fftw_data[i][1]); -#endif - } - - // Calculate SNR - float db = 10 * log(mag_sum / noise_sum) / log(10.0); - - // Free FFTW data - fftwf_free(fftw_data); - - free(n); - // destroy plan - fftwf_destroy_plan(plan); - - // if SNR greater than 120, verification passes - if(db > 120){ - return true; - } - else{ - printf("\tSignal to noise ratio on output sample: %f --> %s\n\n", db, "FAILED"); - return false; - } -} - -#endif // USE_FFTW \ No newline at end of file diff --git a/examples/common/verify_fftw.h b/examples/common/verify_fftw.h deleted file mode 100644 index c31107c..0000000 --- a/examples/common/verify_fftw.h +++ /dev/null @@ -1,10 +0,0 @@ -// Author: Arjun Ramaswami - -#ifndef FFT3D_FFTW_H -#define FFT3D_FFTW_H - -#include - -bool verify_fftwf(float2 *fpgaout, const float2 *verify, int N, int dim, bool inverse, int how_many); - -#endif // FFT3D_FFTW_H \ No newline at end of file diff --git a/examples/fft1d.c b/examples/fft1d.c deleted file mode 100644 index 2ae3e33..0000000 --- a/examples/fft1d.c +++ /dev/null @@ -1,123 +0,0 @@ -// Author: Arjun Ramaswami - -#include -#include // EXIT_FAILURE -#include -#include - -#include "CL/opencl.h" -#include "fftfpga/fftfpga.h" - -#include "argparse.h" -#include "helper.h" - -static const char *const usage[] = { - "bin/host [options]", - NULL, -}; - -int main(int argc, const char **argv) { - int N = 64, dim = 1, iter = 1, batch = 1; - - bool use_bram = false, sp = true, inv = false, use_svm = false, interleaving = false; - bool status = true, use_emulator = false; - - char *path = "fft1d_emulate.aocx"; - const char *platform = "Intel(R) FPGA"; - - fpga_t timing = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; - double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; - double avg_hw_rd = 0.0, avg_hw_wr = 0.0, avg_hw_exec = 0.0; - double temp_timer = 0.0, total_api_time = 0.0; - - struct argparse_option options[] = { - OPT_HELP(), - OPT_GROUP("Basic Options"), - OPT_INTEGER('n',"n", &N, "FFT Points"), - OPT_BOOLEAN('s',"sp", &sp, "Single Precision"), - OPT_INTEGER('i',"iter", &iter, "Iterations"), - OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), - OPT_INTEGER('c',"batch", &batch, "Batch"), - OPT_STRING('p', "path", &path, "Path to bitstream"), - OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), - OPT_END(), - }; - - struct argparse argparse; - argparse_init(&argparse, options, usage, 0); - argparse_describe(&argparse, "Computing FFT using FPGA", "FFT size and dimensions are mandatory, default dimension and number of iterations are 1"); - argc = argparse_parse(&argparse, argc, argv); - - // Print to console the configuration chosen to execute during runtime - print_config(N, dim, iter, inv, sp, batch, use_bram, interleaving); - - if(use_emulator){ - platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; - } - else{ - platform = "Intel(R) FPGA SDK for OpenCL(TM)"; - } - - int isInit = fpga_initialize(platform, path, use_svm); - if(isInit != 0){ - return EXIT_FAILURE; - } - - size_t inp_sz = sizeof(float2) * N * batch; - - float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); - float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); - - // find the average of iterations of batched 1D FFTs - // random data every iteration and every batch - for(size_t i = 0; i < iter; i++){ - - status = fftf_create_data(inp, N * batch); - if(!status){ - fprintf(stderr, "Error in Data Creation \n"); - free(inp); - free(out); - return EXIT_FAILURE; - } - - temp_timer = getTimeinMilliseconds(); - timing = fftfpgaf_c2c_1d(N, inp, out, inv, batch); - total_api_time += getTimeinMilliseconds() - temp_timer; - - // TODO: Verification of bit reversed output - if(timing.valid == 0){ - fprintf(stderr, "Invalid execution, timing found to be 0"); - free(inp); - free(out); - return EXIT_FAILURE; - } - avg_rd += timing.pcie_read_t; - avg_wr += timing.pcie_write_t; - avg_exec += timing.exec_t; - avg_hw_rd += timing.hw_pcie_read_t; - avg_hw_wr += timing.hw_pcie_write_t; - avg_hw_exec += timing.hw_exec_t; - - printf("Iter: %lu\n", i); - printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); - printf("\tKernel: %lfms\n", timing.exec_t); - printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); - - printf("Hw Counters: \n"); - printf("\tHW PCIe Rd: %lfms\n", timing.hw_pcie_read_t); - printf("\tHW Kernel: %lfms\n", timing.hw_exec_t); - printf("\tHW PCIe Wr: %lfms\n\n", timing.hw_pcie_write_t); - } - - // destroy FFT input and output - free(inp); - free(out); - - // destroy data - fpga_final(); - - // display performance measures - display_measures(total_api_time, avg_rd, avg_wr, avg_exec, avg_hw_rd, avg_hw_wr, avg_hw_exec, N, dim, iter, batch, inv, sp); - - return EXIT_SUCCESS; -} \ No newline at end of file diff --git a/examples/fft1d_svm.c b/examples/fft1d_svm.c deleted file mode 100644 index a761f6e..0000000 --- a/examples/fft1d_svm.c +++ /dev/null @@ -1,121 +0,0 @@ -// Author: Arjun Ramaswami - -#include -#include // EXIT_FAILURE -#include -#include - -#include "CL/opencl.h" -#include "fftfpga/fftfpga.h" - -#include "argparse.h" -#include "helper.h" - -static const char *const usage[] = { - "bin/host [options]", - NULL, -}; - -int main(int argc, const char **argv) { - int N = 64, dim = 1, iter = 1, batch = 1; - - bool use_bram = false, sp = true, inv = false, use_svm = true, interleaving = false; - bool status = true, use_emulator = false; - - char *path = "fft1d_emulate.aocx"; - const char *platform = "Intel(R) FPGA"; - - fpga_t timing = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; - double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; - double avg_hw_rd = 0.0, avg_hw_wr = 0.0, avg_hw_exec = 0.0; - double temp_timer = 0.0, total_api_time = 0.0; - - struct argparse_option options[] = { - OPT_HELP(), - OPT_GROUP("Basic Options"), - OPT_INTEGER('n',"n", &N, "FFT Points"), - OPT_INTEGER('i',"iter", &iter, "Iterations"), - OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), - OPT_INTEGER('c',"batch", &batch, "Batch"), - OPT_STRING('p', "path", &path, "Path to bitstream"), - OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), - OPT_END(), - }; - - struct argparse argparse; - argparse_init(&argparse, options, usage, 0); - argparse_describe(&argparse, "Computing FFT using FPGA", "FFT size and dimensions are mandatory, default dimension and number of iterations are 1"); - argc = argparse_parse(&argparse, argc, argv); - - // Print to console the configuration chosen to execute during runtime - print_config(N, dim, iter, inv, sp, batch, use_bram, interleaving); - - if(use_emulator){ - platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; - } - else{ - platform = "Intel(R) FPGA SDK for OpenCL(TM)"; - } - - int isInit = fpga_initialize(platform, path, use_svm); - if(isInit != 0){ - return EXIT_FAILURE; - } - - size_t inp_sz = sizeof(float2) * N * batch; - - float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); - float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); - - // find the average of iterations of batched 1D FFTs - // random data every iteration and every batch - for(size_t i = 0; i < iter; i++){ - - status = fftf_create_data(inp, N * batch); - if(!status){ - fprintf(stderr, "Error in Data Creation \n"); - free(inp); - free(out); - return EXIT_FAILURE; - } - - temp_timer = getTimeinMilliseconds(); - timing = fftfpgaf_c2c_1d_svm(N, inp, out, inv, batch); - total_api_time += getTimeinMilliseconds() - temp_timer; - - // TODO: Verification of bit reversed output - if(timing.valid == 0){ - fprintf(stderr, "Invalid execution, timing found to be 0"); - free(inp); - free(out); - return EXIT_FAILURE; - } - avg_rd += timing.pcie_read_t; - avg_wr += timing.pcie_write_t; - avg_exec += timing.exec_t; - avg_hw_rd += timing.hw_pcie_read_t; - avg_hw_wr += timing.hw_pcie_write_t; - avg_hw_exec += timing.hw_exec_t; - - printf("Iter: %lu\n", i); - printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); - printf("\tKernel: %lfms\n", timing.exec_t); - printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); - - printf("Hw Counters: \n"); - printf("\tHW PCIe Rd: %lfms\n", timing.hw_pcie_read_t); - printf("\tHW Kernel: %lfms\n", timing.hw_exec_t); - printf("\tHW PCIe Wr: %lfms\n\n", timing.hw_pcie_write_t); - - } - // destroy FFT input and output - free(inp); - free(out); - - // destroy data - fpga_final(); - - display_measures(total_api_time, avg_rd, avg_wr, avg_exec, avg_hw_rd, avg_hw_wr, avg_hw_exec, N, dim, iter, batch, inv, sp); - - return EXIT_SUCCESS; -} \ No newline at end of file diff --git a/examples/fft2d.c b/examples/fft2d.c deleted file mode 100644 index 88e1e66..0000000 --- a/examples/fft2d.c +++ /dev/null @@ -1,142 +0,0 @@ -// Author: Arjun Ramaswami - -#include -#include // EXIT_FAILURE -#include -#include - -#include "CL/opencl.h" -#include "fftfpga/fftfpga.h" - -#include "argparse.h" -#include "helper.h" -#include "verify_fftw.h" - -static const char *const usage[] = { - "bin/host [options]", - NULL, -}; - -int main(int argc, const char **argv) { - int N = 64, dim = 2, iter = 1, batch = 1; - - bool use_bram = false, interleaving = false, sp = true, inv = false; - bool status = true, use_emulator = false; - bool use_svm = 0; - - char *path = "fft2d_emulate.aocx"; - const char *platform = "Intel(R) FPGA"; - - fpga_t timing = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; - double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; - double avg_hw_rd = 0.0, avg_hw_wr = 0.0, avg_hw_exec = 0.0; - double temp_timer = 0.0, total_api_time = 0.0; - - struct argparse_option options[] = { - OPT_HELP(), - OPT_GROUP("Basic Options"), - OPT_INTEGER('n',"n", &N, "FFT Points"), - OPT_BOOLEAN('s',"sp", &sp, "Single Precision"), - OPT_INTEGER('i',"iter", &iter, "Iterations"), - OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), - OPT_BOOLEAN('v',"svm", &use_svm, "Use SVM"), - OPT_INTEGER('c',"batch", &batch, "Batch"), - OPT_BOOLEAN('m',"bram", &use_bram, "Use BRAM"), - OPT_BOOLEAN('t',"interleaving", &interleaving, "Use burst interleaving in case of BRAM designs"), - OPT_STRING('p', "path", &path, "Path to bitstream"), - OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), - OPT_END(), - }; - - struct argparse argparse; - argparse_init(&argparse, options, usage, 0); - argparse_describe(&argparse, "Computing FFT using FPGA", "FFT size and dimensions are mandatory, default dimension and number of iterations are 1"); - argc = argparse_parse(&argparse, argc, argv); - - // Print to console the configuration chosen to execute during runtime - print_config(N, dim, iter, inv, sp, batch, use_bram, interleaving); - - if(use_emulator){ - platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; - } - else{ - platform = "Intel(R) FPGA SDK for OpenCL(TM)"; - } - - int isInit = fpga_initialize(platform, path, use_svm); - if(isInit != 0){ - return EXIT_FAILURE; - } - - size_t inp_sz = sizeof(float2) * N * N; - float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); - float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); - - for(size_t i = 0; i < iter; i++){ - - status = fftf_create_data(inp, N * N); - if(!status){ - free(inp); - free(out); - return EXIT_FAILURE; - } - - if(use_bram == 1){ - // use bram for 2d Transpose - temp_timer = getTimeinMilliseconds(); - timing = fftfpgaf_c2c_2d_bram(N, inp, out, inv, interleaving, batch); - total_api_time += getTimeinMilliseconds() - temp_timer; - } - else{ - // use global memory for 2d Transpose - temp_timer = getTimeinMilliseconds(); - timing = fftfpgaf_c2c_2d_ddr(N, inp, out, inv); - total_api_time += getTimeinMilliseconds() - temp_timer; - } - -#ifdef USE_FFTW - if(!verify_fftwf(out, inp, N, 2, inv, 1)){ - fprintf(stderr, "2d FFT Verification Failed \n"); - free(inp); - free(out); - return EXIT_FAILURE; - } -#endif - if(timing.valid == 0){ - fprintf(stderr, "Invalid execution, timing found to be 0"); - free(inp); - free(out); - return EXIT_FAILURE; - } - - avg_rd += timing.pcie_read_t; - avg_wr += timing.pcie_write_t; - avg_exec += timing.exec_t; - avg_hw_rd += timing.hw_pcie_read_t; - avg_hw_wr += timing.hw_pcie_write_t; - avg_hw_exec += timing.hw_exec_t; - - printf("Iter: %lu\n", i); - printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); - printf("\tKernel: %lfms\n", timing.exec_t); - printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); - - printf("Hw Counters: \n"); - printf("\tHW PCIe Rd: %lfms\n", timing.hw_pcie_read_t); - printf("\tHW Kernel: %lfms\n", timing.hw_exec_t); - printf("\tHW PCIe Wr: %lfms\n\n", timing.hw_pcie_write_t); - - } // iter - - // destroy FFT input and output - free(inp); - free(out); - - // destroy fpga state - fpga_final(); - - // display performance measures - display_measures(total_api_time, avg_rd, avg_wr, avg_exec, avg_hw_rd, avg_hw_wr, avg_hw_exec, N, dim, iter, batch, inv, sp); - - return EXIT_SUCCESS; -} diff --git a/examples/fft2d_bram_svm.c b/examples/fft2d_bram_svm.c deleted file mode 100644 index 425013d..0000000 --- a/examples/fft2d_bram_svm.c +++ /dev/null @@ -1,131 +0,0 @@ -// Author: Arjun Ramaswami - -#include -#include // EXIT_FAILURE -#include -#include - -#include "CL/opencl.h" -#include "fftfpga/fftfpga.h" - -#include "argparse.h" -#include "helper.h" -#include "verify_fftw.h" - -static const char *const usage[] = { - "bin/host [options]", - NULL, -}; - -int main(int argc, const char **argv) { - int N = 64, dim = 2, iter = 1, batch = 1, how_many = 1; - - bool use_bram = true, interleaving = false, sp = true, inv = false; - bool status = true, use_emulator = false; - bool use_svm = true; - - char *path = "fft2d_emulate.aocx"; - const char *platform = "Intel(R) FPGA"; - - fpga_t timing = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; - double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; - double avg_hw_rd = 0.0, avg_hw_wr = 0.0, avg_hw_exec = 0.0; - double temp_timer = 0.0, total_api_time = 0.0; - - struct argparse_option options[] = { - OPT_HELP(), - OPT_GROUP("Basic Options"), - OPT_INTEGER('n',"n", &N, "FFT Points"), - OPT_BOOLEAN('s',"sp", &sp, "Single Precision"), - OPT_INTEGER('i',"iter", &iter, "Iterations"), - OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), - OPT_INTEGER('m',"how_many", &how_many, "How Many per Call"), - OPT_STRING('p', "path", &path, "Path to bitstream"), - OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), - OPT_END(), - }; - - struct argparse argparse; - argparse_init(&argparse, options, usage, 0); - argparse_describe(&argparse, "Computing FFT using FPGA", "FFT size and dimensions are mandatory, default dimension and number of iterations are 1"); - argc = argparse_parse(&argparse, argc, argv); - - // Print to console the configuration chosen to execute during runtime - print_config(N, dim, iter, inv, sp, how_many, use_bram, interleaving); - - if(use_emulator){ - platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; - } - else{ - platform = "Intel(R) FPGA SDK for OpenCL(TM)"; - } - - int isInit = fpga_initialize(platform, path, use_svm); - if(isInit != 0){ - return EXIT_FAILURE; - } - - size_t inp_sz = sizeof(float2) * N * N * how_many; - float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); - float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); - - for(size_t i = 0; i < iter; i++){ - - status = fftf_create_data(inp, inp_sz); - if(!status){ - free(inp); - free(out); - return EXIT_FAILURE; - } - - // use bram for 2d Transpose - temp_timer = getTimeinMilliseconds(); - timing = fftfpgaf_c2c_2d_bram_svm(N, inp, out, inv, how_many); - total_api_time += getTimeinMilliseconds() - temp_timer; - -#ifdef USE_FFTW - if(!verify_fftwf(out, inp, N, 2, inv, how_many)){ - fprintf(stderr, "2d FFT Verification Failed \n"); - free(inp); - free(out); - return EXIT_FAILURE; - } -#endif - if(timing.valid == 0){ - fprintf(stderr, "Invalid execution, timing found to be 0"); - free(inp); - free(out); - return EXIT_FAILURE; - } - - avg_rd += timing.pcie_read_t; - avg_wr += timing.pcie_write_t; - avg_exec += timing.exec_t; - avg_hw_rd += timing.hw_pcie_read_t; - avg_hw_wr += timing.hw_pcie_write_t; - avg_hw_exec += timing.hw_exec_t; - - printf("Iter: %lu\n", i); - printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); - printf("\tKernel: %lfms\n", timing.exec_t); - printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); - - printf("Hw Counters: \n"); - printf("\tHW PCIe Rd: %lfms\n", timing.hw_pcie_read_t); - printf("\tHW Kernel: %lfms\n", timing.hw_exec_t); - printf("\tHW PCIe Wr: %lfms\n\n", timing.hw_pcie_write_t); - - } // iter - - // destroy FFT input and output - free(inp); - free(out); - - // destroy fpga state - fpga_final(); - - // display performance measures - display_measures(total_api_time, avg_rd, avg_wr, avg_exec, avg_hw_rd, avg_hw_wr, avg_hw_exec, N, dim, iter, batch, inv, sp); - - return EXIT_SUCCESS; -} diff --git a/examples/fft3d_bram.c b/examples/fft3d_bram.c deleted file mode 100755 index c5a3c68..0000000 --- a/examples/fft3d_bram.c +++ /dev/null @@ -1,134 +0,0 @@ -// Author: Arjun Ramaswami - -#include -#include // EXIT_FAILURE -#include -#include - -#include "CL/opencl.h" -#include "fftfpga/fftfpga.h" - -#include "argparse.h" -#include "helper.h" -#include "verify_fftw.h" - -static const char *const usage[] = { - "bin/host [options]", - NULL, -}; - -int main(int argc, const char **argv) { - int N = 64, dim = 3, iter = 1, batch = 1; - - bool interleaving = false, use_bram = true, sp = true; - bool use_svm = false, inv = false; - bool status = true, use_emulator = false; - - char *path = "fft3d_emulate.aocx"; - const char *platform; - - fpga_t timing = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; - double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; - double avg_hw_rd = 0.0, avg_hw_wr = 0.0, avg_hw_exec = 0.0; - double temp_timer = 0.0, total_api_time = 0.0; - - struct argparse_option options[] = { - OPT_HELP(), - OPT_GROUP("Basic Options"), - OPT_INTEGER('n',"n", &N, "FFT Points"), - OPT_BOOLEAN('s',"sp", &sp, "Single Precision"), - OPT_INTEGER('i',"iter", &iter, "Iterations"), - OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), - OPT_BOOLEAN('t',"interleaving", &interleaving, "Use burst interleaving in case of BRAM designs"), - OPT_STRING('p', "path", &path, "Path to bitstream"), - OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), - OPT_END(), - }; - - struct argparse argparse; - argparse_init(&argparse, options, usage, 0); - argparse_describe(&argparse, "Computing FFT using FPGA", "FFT size and dimensions are mandatory, default dimension and number of iterations are 1"); - argc = argparse_parse(&argparse, argc, argv); - - // Print to console the configuration chosen to execute during runtime - print_config(N, dim, iter, inv, sp, batch, use_bram, interleaving); - - if(use_emulator){ - platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; - //platform = "Intel(R) FPGA"; - } - else{ - platform = "Intel(R) FPGA SDK for OpenCL(TM)"; - //platform = "Intel(R) FPGA"; - } - - int isInit = fpga_initialize(platform, path, use_svm); - if(isInit != 0){ - fprintf(stderr, "FPGA initialization error\n"); - return EXIT_FAILURE; - } - - size_t inp_sz = sizeof(float2) * N * N * N; - float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); - float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); - - for(size_t i = 0; i < iter; i++){ - - // create and destroy data every iteration - status = fftf_create_data(inp, N * N * N); - if(!status){ - fprintf(stderr, "Error in Data Creation \n"); - free(inp); - free(out); - return EXIT_FAILURE; - } - - // use bram for 3d Transpose - temp_timer = getTimeinMilliseconds(); - timing = fftfpgaf_c2c_3d_bram(N, inp, out, inv, interleaving); - total_api_time += getTimeinMilliseconds() - temp_timer; - -#ifdef USE_FFTW - if(!verify_fftwf(out, inp, N, 3, inv, 1)){ - fprintf(stderr, "3d FFT Verification Failed \n"); - free(inp); - free(out); - return EXIT_FAILURE; - } -#endif - if(timing.valid == 0){ - fprintf(stderr, "Invalid execution, timing found to be 0"); - free(inp); - free(out); - return EXIT_FAILURE; - } - - avg_rd += timing.pcie_read_t; - avg_wr += timing.pcie_write_t; - avg_exec += timing.exec_t; - avg_hw_rd += timing.hw_pcie_read_t; - avg_hw_wr += timing.hw_pcie_write_t; - avg_hw_exec += timing.hw_exec_t; - - printf("Iter: %lu\n", i); - printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); - printf("\tKernel: %lfms\n", timing.exec_t); - printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); - - printf("Hw Counters: \n"); - printf("\tHW PCIe Rd: %lfms\n", timing.hw_pcie_read_t); - printf("\tHW Kernel: %lfms\n", timing.hw_exec_t); - printf("\tHW PCIe Wr: %lfms\n\n", timing.hw_pcie_write_t); - } // iter - // destroy FFT input and output - free(inp); - free(out); - - // destroy fpga state - fpga_final(); - - // display performance measures - display_measures(total_api_time, avg_rd, avg_wr, avg_exec, avg_hw_rd, avg_hw_wr, avg_hw_exec, N, dim, iter, batch, inv, sp); - - return EXIT_SUCCESS; -} diff --git a/examples/fft3d_ddr.c b/examples/fft3d_ddr.c deleted file mode 100755 index 901b6e1..0000000 --- a/examples/fft3d_ddr.c +++ /dev/null @@ -1,138 +0,0 @@ -// Author: Arjun Ramaswami - -#include -#include // EXIT_FAILURE -#include -#include - -#include "CL/opencl.h" -#include "fftfpga/fftfpga.h" - -#include "argparse.h" -#include "helper.h" -#include "verify_fftw.h" - -static const char *const usage[] = { - "bin/host [options]", - NULL, -}; - -int main(int argc, const char **argv) { - int N = 64, dim = 3, iter = 1, batch = 1; - - bool inv = false, sp = true; - bool use_bram = false, interleaving = false, use_svm = false; - bool status = true, use_emulator = false; - - char *path = "fft3d_emulate.aocx"; - const char *platform; - - fpga_t timing = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; - double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; - double avg_hw_rd = 0.0, avg_hw_wr = 0.0, avg_hw_exec = 0.0; - double temp_timer = 0.0, total_api_time = 0.0; - - struct argparse_option options[] = { - OPT_HELP(), - OPT_GROUP("Basic Options"), - OPT_INTEGER('n',"n", &N, "FFT Points"), - OPT_BOOLEAN('s',"sp", &sp, "Single Precision"), - OPT_INTEGER('i',"iter", &iter, "Iterations"), - OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), - OPT_BOOLEAN('t',"interleaving", &interleaving, "Use burst interleaving in case of BRAM designs"), - OPT_STRING('p', "path", &path, "Path to bitstream"), - OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), - OPT_END(), - }; - - struct argparse argparse; - argparse_init(&argparse, options, usage, 0); - argparse_describe(&argparse, "Computing FFT using FPGA", "FFT size and dimensions are mandatory, default dimension and number of iterations are 1"); - argc = argparse_parse(&argparse, argc, argv); - - // Print to console the configuration chosen to execute during runtime - print_config(N, dim, iter, inv, sp, batch, use_bram, interleaving); - - if(use_emulator){ - platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; - //platform = "Intel(R) FPGA"; - } - else{ - platform = "Intel(R) FPGA SDK for OpenCL(TM)"; - //platform = "Intel(R) FPGA"; - } - - int isInit = fpga_initialize(platform, path, use_svm); - if(isInit != 0){ - fprintf(stderr, "FPGA initialization error\n"); - return EXIT_FAILURE; - } - - size_t inp_sz = sizeof(float2) * N * N * N; - float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); - float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); - - for(size_t i = 0; i < iter; i++){ - - // create and destroy data every iteration - double data_timer = getTimeinMilliseconds(); - status = fftf_create_data(inp, N * N * N); - data_timer = getTimeinMilliseconds() - data_timer; - if(!status){ - fprintf(stderr, "Error in Data Creation \n"); - free(inp); - free(out); - return EXIT_FAILURE; - } - printf("Time to Create Data: %lfsec for %uMB\n", data_timer * 1e-3, (N*N*N* 8 / (1024 * 1024))); - - // use ddr for 3d Transpose - temp_timer = getTimeinMilliseconds(); - timing = fftfpgaf_c2c_3d_ddr(N, inp, out, inv); - total_api_time += getTimeinMilliseconds() - temp_timer; - -#ifdef USE_FFTW - if(!verify_fftwf(out, inp, N, 3, inv, 1)){ - fprintf(stderr, "3d FFT Verification Failed \n"); - free(inp); - free(out); - return EXIT_FAILURE; - } -#endif - if(timing.valid == 0){ - fprintf(stderr, "Invalid execution, timing found to be 0"); - free(inp); - free(out); - return EXIT_FAILURE; - } - - avg_rd += timing.pcie_read_t; - avg_wr += timing.pcie_write_t; - avg_exec += timing.exec_t; - avg_hw_rd += timing.hw_pcie_read_t; - avg_hw_wr += timing.hw_pcie_write_t; - avg_hw_exec += timing.hw_exec_t; - - printf("Iter: %lu\n", i); - printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); - printf("\tKernel: %lfms\n", timing.exec_t); - printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); - - printf("Hw Counters: \n"); - printf("\tHW PCIe Rd: %lfms\n", timing.hw_pcie_read_t); - printf("\tHW Kernel: %lfms\n", timing.hw_exec_t); - printf("\tHW PCIe Wr: %lfms\n\n", timing.hw_pcie_write_t); - - } // iter - // destroy FFT input and output - free(inp); - free(out); - - // destroy fpga state - fpga_final(); - - // display performance measures - display_measures(total_api_time, avg_rd, avg_wr, avg_exec, avg_hw_rd, avg_hw_wr, avg_hw_exec, N, dim, iter, batch, inv, sp); - - return EXIT_SUCCESS; -} diff --git a/examples/fft3d_ddr_batch.c b/examples/fft3d_ddr_batch.c deleted file mode 100755 index a949dfe..0000000 --- a/examples/fft3d_ddr_batch.c +++ /dev/null @@ -1,139 +0,0 @@ -// Author: Arjun Ramaswami - -#include -#include // EXIT_FAILURE -#include -#include - -#include "CL/opencl.h" -#include "fftfpga/fftfpga.h" - -#include "argparse.h" -#include "helper.h" -#include "verify_fftw.h" - -static const char *const usage[] = { - "bin/host [options]", - NULL, -}; - -int main(int argc, const char **argv) { - int N = 64, dim = 3, iter = 1, batch = 1; - - bool inv = false, sp = true; - bool use_bram = false, interleaving = false, use_svm = false; - bool status = true, use_emulator = false; - - char *path = "fft3d_emulate.aocx"; - const char *platform; - - fpga_t timing = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; - double avg_hw_rd = 0.0, avg_hw_wr = 0.0, avg_hw_exec = 0.0; - double temp_timer = 0.0, total_api_time = 0.0; - double data_timer = 0.0; - - struct argparse_option options[] = { - OPT_HELP(), - OPT_GROUP("Basic Options"), - OPT_INTEGER('n',"n", &N, "FFT Points"), - OPT_BOOLEAN('s',"sp", &sp, "Single Precision"), - OPT_INTEGER('i',"iter", &iter, "Iterations"), - OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), - OPT_INTEGER('c',"batch", &batch, "Batch"), - OPT_BOOLEAN('t',"interleaving", &interleaving, "Use burst interleaving in case of BRAM designs"), - OPT_STRING('p', "path", &path, "Path to bitstream"), - OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), - OPT_END(), - }; - - struct argparse argparse; - argparse_init(&argparse, options, usage, 0); - argparse_describe(&argparse, "Computing FFT using FPGA", "FFT size and dimensions are mandatory, default dimension and number of iterations are 1"); - argc = argparse_parse(&argparse, argc, argv); - - // Print to console the configuration chosen to execute during runtime - print_config(N, dim, iter, inv, sp, batch, use_bram, interleaving); - - if(use_emulator){ - platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; - //platform = "Intel(R) FPGA"; - } - else{ - platform = "Intel(R) FPGA SDK for OpenCL(TM)"; - //platform = "Intel(R) FPGA"; - } - - int isInit = fpga_initialize(platform, path, use_svm); - if(isInit != 0){ - fprintf(stderr, "FPGA initialization error\n"); - return EXIT_FAILURE; - } - - size_t inp_sz = sizeof(float2) * N * N * N * batch; - float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); - float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); - - for(size_t i = 0; i < iter; i++){ - - // create and destroy data every iteration - data_timer = getTimeinMilliseconds(); - status = fftf_create_data(inp, N * N * N * batch); - data_timer = getTimeinMilliseconds() - data_timer; - if(!status){ - fprintf(stderr, "Error in Data Creation \n"); - free(inp); - free(out); - return EXIT_FAILURE; - } - printf("Time to Create Data: %lfsec for %uMB\n", data_timer * 1e-3, (N*N*N* 8 / (1024 * 1024))); - - // use ddr for 3d Transpose - temp_timer = getTimeinMilliseconds(); - timing = fftfpgaf_c2c_3d_ddr_batch(N, inp, out, inv, interleaving, batch); - total_api_time += getTimeinMilliseconds() - temp_timer; - -#ifdef USE_FFTW - if(!verify_fftwf(out, inp, N, 3, inv, batch)){ - fprintf(stderr, "3d FFT Verification Failed \n"); - free(inp); - free(out); - return EXIT_FAILURE; - } -#endif - if(timing.valid == 0){ - fprintf(stderr, "Invalid execution, timing found to be 0"); - free(inp); - free(out); - return EXIT_FAILURE; - } - - avg_rd += timing.pcie_read_t; - avg_wr += timing.pcie_write_t; - avg_exec += timing.exec_t; - avg_hw_rd += timing.hw_pcie_read_t; - avg_hw_wr += timing.hw_pcie_write_t; - avg_hw_exec += timing.hw_exec_t; - - printf("Iter: %lu\n", i); - printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); - printf("\tKernel: %lfms\n", timing.exec_t); - printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); - - printf("Hw Counters: \n"); - printf("\tHW PCIe Rd: %lfms\n", timing.hw_pcie_read_t); - printf("\tHW Kernel: %lfms\n", timing.hw_exec_t); - printf("\tHW PCIe Wr: %lfms\n\n", timing.hw_pcie_write_t); - - } // iter - // destroy FFT input and output - free(inp); - free(out); - - // destroy fpga state - fpga_final(); - - // display performance measures - display_measures(total_api_time, avg_rd, avg_wr, avg_exec, avg_hw_rd, avg_hw_wr, avg_hw_exec, N, dim, iter, batch, inv, sp); - - return EXIT_SUCCESS; -} diff --git a/examples/fft3d_ddr_svm.c b/examples/fft3d_ddr_svm.c deleted file mode 100755 index 97b4a34..0000000 --- a/examples/fft3d_ddr_svm.c +++ /dev/null @@ -1,149 +0,0 @@ -// Author: Arjun Ramaswami - -#include -#include // EXIT_FAILURE -#include -#include - -#include "CL/opencl.h" -#include "fftfpga/fftfpga.h" - -#include "argparse.h" -#include "helper.h" -#include "verify_fftw.h" - -static const char *const usage[] = { - "bin/host [options]", - NULL, -}; - -int main(int argc, const char **argv) { - int N = 64, dim = 3, iter = 1, batch = 1; - - bool interleaving = false, use_bram = false, sp = true, inv = false; - bool status = true, use_emulator = false; - bool use_svm = true; - - char *path = "fft3d_emulate.aocx"; - const char *platform; - - fpga_t timing = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; - double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; - double avg_hw_rd = 0.0, avg_hw_wr = 0.0, avg_hw_exec = 0.0; - double temp_timer = 0.0, total_api_time = 0.0; - double avg_svm_copyin = 0.0, avg_svm_copyout = 0.0; - - struct argparse_option options[] = { - OPT_HELP(), - OPT_GROUP("Basic Options"), - OPT_INTEGER('n',"n", &N, "FFT Points"), - OPT_BOOLEAN('s',"sp", &sp, "Single Precision"), - OPT_INTEGER('i',"iter", &iter, "Iterations"), - OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), - OPT_BOOLEAN('t',"interleaving", &interleaving, "Use burst interleaving in case of BRAM designs"), - OPT_STRING('p', "path", &path, "Path to bitstream"), - OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), - OPT_END(), - }; - - struct argparse argparse; - argparse_init(&argparse, options, usage, 0); - argparse_describe(&argparse, "Computing FFT using FPGA", "FFT size and dimensions are mandatory, default dimension and number of iterations are 1"); - argc = argparse_parse(&argparse, argc, argv); - - // Print to console the configuration chosen to execute during runtime - print_config(N, dim, iter, inv, sp, batch, use_bram, interleaving); - - if(use_emulator){ - platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; - //platform = "Intel(R) FPGA"; - } - else{ - platform = "Intel(R) FPGA SDK for OpenCL(TM)"; - //platform = "Intel(R) FPGA"; - } - - int isInit = fpga_initialize(platform, path, use_svm); - if(isInit != 0){ - fprintf(stderr, "FPGA initialization error\n"); - return EXIT_FAILURE; - } - - // create and destroy data every iteration - size_t inp_sz = sizeof(float2) * N * N * N; - float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); - float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); - - for(size_t i = 0; i < iter; i++){ - - double data_timer = getTimeinMilliseconds(); - status = fftf_create_data(inp, N * N * N); - data_timer = getTimeinMilliseconds() - data_timer; - if(!status){ - fprintf(stderr, "Error in Data Creation \n"); - free(inp); - free(out); - return EXIT_FAILURE; - } - printf("Time to Create Data: %lfsec for %uMB\n", data_timer * 1e-3, (N*N*N* 8 / (1024 * 1024))); - - // use ddr for 3d Transpose - temp_timer = getTimeinMilliseconds(); - timing = fftfpgaf_c2c_3d_ddr_svm(N, inp, out, inv, interleaving); - total_api_time += getTimeinMilliseconds() - temp_timer; - -#ifdef USE_FFTW - if(!verify_fftwf(out, inp, N, 3, inv, 1)){ - fprintf(stderr, "3d FFT Verification Failed \n"); - free(inp); - free(out); - return EXIT_FAILURE; - } -#endif - if(!timing.valid){ - fprintf(stderr, "Invalid execution, timing found to be 0"); - free(inp); - free(out); - return EXIT_FAILURE; - } - - avg_rd += timing.pcie_read_t; - avg_wr += timing.pcie_write_t; - avg_exec += timing.exec_t; - avg_hw_rd += timing.hw_pcie_read_t; - avg_hw_wr += timing.hw_pcie_write_t; - avg_hw_exec += timing.hw_exec_t; - avg_svm_copyin += timing.svm_copyin_t; - avg_svm_copyout += timing.svm_copyout_t; - - printf("Iter: %lu\n", i); - printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); - printf("\tKernel: %lfms\n", timing.exec_t); - printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); - - printf("Hw Counters: \n"); - printf("\tHW PCIe Rd: %lfms\n", timing.hw_pcie_read_t); - printf("\tHW Kernel: %lfms\n", timing.hw_exec_t); - printf("\tHW PCIe Wr: %lfms\n\n", timing.hw_pcie_write_t); - - printf("SVM Memcpy: \n"); - printf("\tHW Copy In: %lfms\n", timing.svm_copyin_t); - printf("\tHW Copy Out: %lfms\n\n", timing.svm_copyout_t); - } // iter - - // destroy FFT input and output - free(inp); - free(out); - - // destroy fpga state - fpga_final(); - - // display performance measures - display_measures(total_api_time, avg_rd, avg_wr, avg_exec, avg_hw_rd, avg_hw_wr, avg_hw_exec, N, dim, iter, batch, inv, sp); - - printf("\n"); - printf("SVM Copy In = %.4lfms\n", avg_svm_copyin / iter); - printf("SVM Copy Out = %.4lfms\n", avg_svm_copyout / iter); - - return EXIT_SUCCESS; -} diff --git a/examples/fft3d_ddr_svm_batch.c b/examples/fft3d_ddr_svm_batch.c deleted file mode 100755 index 639292d..0000000 --- a/examples/fft3d_ddr_svm_batch.c +++ /dev/null @@ -1,154 +0,0 @@ -// Author: Arjun Ramaswami - -#include -#include // EXIT_FAILURE -#include -#include - -#include "CL/opencl.h" -#include "fftfpga/fftfpga.h" - -#include "argparse.h" -#include "helper.h" -#include "verify_fftw.h" - -static const char *const usage[] = { - "bin/host [options]", - NULL, -}; - -int main(int argc, const char **argv) { - int N = 64, dim = 3, iter = 1, batch = 1; - - bool inv = false, sp = true, use_bram = false, interleaving = false; - bool status = true, use_emulator = false; - bool use_svm = true; - - char *path = "fft3d_emulate.aocx"; - const char *platform; - fpga_t timing = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0.0, false}; - double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; - double avg_hw_rd = 0.0, avg_hw_wr = 0.0, avg_hw_exec = 0.0; - double avg_svm_copyin = 0.0, avg_svm_copyout = 0.0; - - bool noverify = false; - - struct argparse_option options[] = { - OPT_HELP(), - OPT_GROUP("Basic Options"), - OPT_INTEGER('n',"n", &N, "FFT Points"), - OPT_INTEGER('i',"iter", &iter, "Iterations"), - OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), - OPT_INTEGER('c',"batch", &batch, "Batch"), - OPT_BOOLEAN('t',"interleaving", &interleaving, "Use burst interleaving in case of BRAM designs"), - OPT_STRING('p', "path", &path, "Path to bitstream"), - OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), - OPT_BOOLEAN('y', "noverify", &noverify, "Don't verify results"), - OPT_END(), - }; - - struct argparse argparse; - argparse_init(&argparse, options, usage, 0); - argparse_describe(&argparse, "Computing FFT using FPGA", "FFT size and dimensions are mandatory, default dimension and number of iterations are 1"); - argc = argparse_parse(&argparse, argc, argv); - - // Print to console the configuration chosen to execute during runtime - print_config(N, dim, iter, inv, sp, batch, use_bram, interleaving); - - if(use_emulator){ - platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; - //platform = "Intel(R) FPGA"; - } - else{ - platform = "Intel(R) FPGA SDK for OpenCL(TM)"; - //platform = "Intel(R) FPGA"; - } - - int isInit = fpga_initialize(platform, path, use_svm); - if(isInit != 0){ - fprintf(stderr, "FPGA initialization error\n"); - return EXIT_FAILURE; - } - - double total_api_time = 0.0; - - // create and destroy data every iteration - size_t inp_sz = sizeof(float2) * N * N * N * batch; - float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); - float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); - - unsigned num_pts = N*N*N * batch; - - for(size_t i = 0; i < iter; i++){ - - status = fftf_create_data(inp, num_pts); - if(!status){ - fprintf(stderr, "Error in Data Creation \n"); - free(inp); - free(out); - return EXIT_FAILURE; - } - - // use ddr for 3d Transpose - double temp_timer = getTimeinMilliseconds(); - timing = fftfpgaf_c2c_3d_ddr_svm_batch(N, inp, out, inv, batch); - total_api_time += getTimeinMilliseconds() - temp_timer; - -#ifdef USE_FFTW - if(noverify == false){ - printf("Verifying results for iteration %lu\n", i); - if(!verify_fftwf(out, inp, N, 3, inv, batch)){ - fprintf(stderr, "3d FFT Verification Failed \n"); - free(inp); - free(out); - return EXIT_FAILURE; - } - } -#endif - if(!timing.valid){ - fprintf(stderr, "Invalid execution, timing found to be 0"); - free(inp); - free(out); - return EXIT_FAILURE; - } - - avg_rd += timing.pcie_read_t; - avg_wr += timing.pcie_write_t; - avg_exec += timing.exec_t; - avg_hw_rd += timing.hw_pcie_read_t; - avg_hw_wr += timing.hw_pcie_write_t; - avg_hw_exec += timing.hw_exec_t; - avg_svm_copyin += timing.svm_copyin_t; - avg_svm_copyout += timing.svm_copyout_t; - - printf("Iter: %lu\n", i); - printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); - printf("\tKernel: %lfms\n", timing.exec_t); - printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); - - printf("Hw Counters: \n"); - printf("\tHW PCIe Rd: %lfms\n", timing.hw_pcie_read_t); - printf("\tHW Kernel: %lfms\n", timing.hw_exec_t); - printf("\tHW PCIe Wr: %lfms\n\n", timing.hw_pcie_write_t); - - printf("SVM Memcpy: \n"); - printf("\tHW Copy In: %lfms\n", timing.svm_copyin_t); - printf("\tHW Copy Out: %lfms\n\n", timing.svm_copyout_t); - } // iter - - // destroy FFT input and output - free(inp); - free(out); - - // destroy fpga state - fpga_final(); - - // display performance measures - display_measures(total_api_time, avg_rd, avg_wr, avg_exec, avg_hw_rd, avg_hw_wr, avg_hw_exec, N, dim, iter, batch, inv, sp); - - printf("\n"); - printf("SVM Copy In = %.4lfms\n", avg_svm_copyin / iter); - printf("SVM Copy Out = %.4lfms\n", avg_svm_copyout / iter); - - return EXIT_SUCCESS; -} diff --git a/examples/fft3d_svm.c b/examples/fft3d_svm.c deleted file mode 100644 index 89d7696..0000000 --- a/examples/fft3d_svm.c +++ /dev/null @@ -1,140 +0,0 @@ -// Author: Arjun Ramaswami - -#include -#include // EXIT_FAILURE -#include -#include - -#include "CL/opencl.h" -#include "fftfpga/fftfpga.h" - -#include "argparse.h" -#include "helper.h" -#include "verify_fftw.h" - -static const char *const usage[] = { - "bin/host [options]", - NULL, -}; - -int main(int argc, const char **argv) { - int N = 64, dim = 3, iter = 1, batch = 1; - - bool interleaving = false, use_bram = false, sp = true, inv = false; - bool status = true, use_emulator = false; - bool use_svm = true; - - char *path = "fft3d_emulate.aocx"; - const char *platform; - - fpga_t timing = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0}; - double avg_rd = 0.0, avg_wr = 0.0, avg_exec = 0.0; - double avg_hw_rd = 0.0, avg_hw_wr = 0.0, avg_hw_exec = 0.0; - double temp_timer = 0.0, total_api_time = 0.0; - - struct argparse_option options[] = { - OPT_HELP(), - OPT_GROUP("Basic Options"), - OPT_INTEGER('n',"n", &N, "FFT Points"), - OPT_BOOLEAN('s',"sp", &sp, "Single Precision"), - OPT_INTEGER('i',"iter", &iter, "Iterations"), - OPT_BOOLEAN('b',"back", &inv, "Backward FFT"), - OPT_BOOLEAN('t',"interleaving", &interleaving, "Use burst interleaving in case of BRAM designs"), - OPT_STRING('p', "path", &path, "Path to bitstream"), - OPT_BOOLEAN('e', "emu", &use_emulator, "Use emulator"), - OPT_END(), - }; - - struct argparse argparse; - argparse_init(&argparse, options, usage, 0); - argparse_describe(&argparse, "Computing FFT using FPGA", "FFT size and dimensions are mandatory, default dimension and number of iterations are 1"); - argc = argparse_parse(&argparse, argc, argv); - - // Print to console the configuration chosen to execute during runtime - print_config(N, dim, iter, inv, sp, batch, use_bram, interleaving); - - if(use_emulator){ - platform = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; - //platform = "Intel(R) FPGA"; - } - else{ - platform = "Intel(R) FPGA SDK for OpenCL(TM)"; - //platform = "Intel(R) FPGA"; - } - - int isInit = fpga_initialize(platform, path, use_svm); - if(isInit != 0){ - fprintf(stderr, "FPGA initialization error\n"); - return EXIT_FAILURE; - } - - if(sp == 0){ - printf("Not implemented. Work in Progress\n"); - return EXIT_SUCCESS; - } - else{ - // create and destroy data every iteration - size_t inp_sz = sizeof(float2) * N * N * N; - float2 *inp = (float2*)fftfpgaf_complex_malloc(inp_sz); - float2 *out = (float2*)fftfpgaf_complex_malloc(inp_sz); - - for(size_t i = 0; i < iter; i++){ - status = fftf_create_data(inp, N * N * N); - if(!status){ - fprintf(stderr, "Error in Data Creation \n"); - free(inp); - free(out); - return EXIT_FAILURE; - } - - temp_timer = getTimeinMilliseconds(); - timing = fftfpgaf_c2c_3d_ddr_svm(N, inp, out, inv); - total_api_time += getTimeinMilliseconds() - temp_timer; - -#ifdef USE_FFTW - if(!verify_sp_fft3d_fftw(out, inp, N, inv, 1)){ - fprintf(stderr, "3d FFT Verification Failed \n"); - free(inp); - free(out); - return EXIT_FAILURE; - } -#endif - if(timing.valid == 0){ - fprintf(stderr, "Invalid execution, timing found to be 0"); - free(inp); - free(out); - return EXIT_FAILURE; - } - - avg_rd += timing.pcie_read_t; - avg_wr += timing.pcie_write_t; - avg_exec += timing.exec_t; - avg_hw_rd += timing.hw_pcie_read_t; - avg_hw_wr += timing.hw_pcie_write_t; - avg_hw_exec += timing.hw_exec_t; - - printf("Iter: %lu\n", i); - printf("\tPCIe Rd: %lfms\n", timing.pcie_read_t); - printf("\tKernel: %lfms\n", timing.exec_t); - printf("\tPCIe Wr: %lfms\n\n", timing.pcie_write_t); - - printf("Hw Counters: \n"); - printf("\tHW PCIe Rd: %lfms\n", timing.hw_pcie_read_t); - printf("\tHW Kernel: %lfms\n", timing.hw_exec_t); - printf("\tHW PCIe Wr: %lfms\n\n", timing.hw_pcie_write_t); - - } // iter - - // destroy FFT input and output - free(inp); - free(out); - } // sp condition - - // destroy fpga state - fpga_final(); - - // display performance measures - display_measures(total_api_time, avg_rd, avg_wr, avg_exec, avg_hw_rd, avg_hw_wr, avg_hw_exec, N, dim, iter, batch, inv, sp); - - return EXIT_SUCCESS; -} From 81fe6c16c05ec058283d5cc045120f3dd65c6ec2 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Sat, 16 Oct 2021 18:16:30 +0200 Subject: [PATCH 65/76] fixed emulation dependency in test --- tests/CMakeLists.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 3f3925d..6a20d2a 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -30,11 +30,11 @@ else() message(WARNING, "FFTW library not found. Cannot perform correctness tests!") endif() -add_dependencies(test_fftfpga fft3d_bram_emu) -add_dependencies(test_fftfpga fft3d_ddr_emu) -add_dependencies(test_fftfpga fft2d_bram_emu) -add_dependencies(test_fftfpga fft2d_ddr_emu) -add_dependencies(test_fftfpga fft1d_emu) +add_dependencies(test_fftfpga fft3d_bram_emulate) +add_dependencies(test_fftfpga fft3d_ddr_emulate) +add_dependencies(test_fftfpga fft2d_bram_emulate) +add_dependencies(test_fftfpga fft2d_ddr_emulate) +add_dependencies(test_fftfpga fft1d_emulate) add_test( NAME test From 6a1ddd4d35b43f9ed7d6924eb2a720af7c3dfde5 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Sat, 16 Oct 2021 18:16:47 +0200 Subject: [PATCH 66/76] draft userguide --- docs/userguide.md | 109 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 docs/userguide.md diff --git a/docs/userguide.md b/docs/userguide.md new file mode 100644 index 0000000..d889a38 --- /dev/null +++ b/docs/userguide.md @@ -0,0 +1,109 @@ +# User Guide + +## Repository Structure + +- `api` : host code to setup and execute FPGA bitstreams. Compiled to static library that can be linked to your application +- `kernels` : OpenCL kernel code for 1d, 2d and 3d FFT +- `examples`: Sample code that makes use of the api +- `cmake` : cmake modules used by the build system +- `scripts`: convenience slurm scripts +- `docs` : describes models regarding performance and resource utilization + +## Build System + +### External Libraries + +These additional libraries that are automatically fetched during system configuration: + +- [cxxopts](https://github.com/jarro2783/cxxopts) for command line argument parsing +- [hlslib](https://github.com/definelicht/hlslib) for CMake Intel FPGA OpenCL find packages +- [findFFTW](https://github.com/egpbos/findFFTW.git) for CMake FFTW find package +- [gtest](https://github.com/google/googletest.git) for unit tests + +### List of Kernels + +| | Kernel Name | Description | +| :-- | :---------- | :---------------------------------- | +| 1D | fft1d | OpenCL design provided by Intel | +| 2D | fft2d\_ddr | DDR memory is used for 2D Transpose | +| | fft2d\_bram | BRAM is used for 2D Transpose | +| 3D | fft3d\_ddr | DDR memory is used for 3D Transpose | +| | fft3d\_bram | BRAM is used for 3D Transpose | + +These kernels can be synthesized by appending `_emulate` or `_syn` to its suffix such as `fft1d_emulate`. + +### Additional Kernel Builds + +Generation of aocl reports + +```bash +make _report +make fft1d_report +``` + +## Compile Definitions + +Using ccmake or by setting it using -D + +- `LOG_SIZE`: set the log of the length of the matrix. Example: `-DLOG_SIZE=6`. + +## Enabling Shared Virtual Memory Extensions (SVM) + +Currently tested for pacd5005 board. The board specification required setting the following attributes to global memory accesses, hence it has been set automatically. Otherwise, it can be set under the variable names. + + + +## Runtime Input Parameters + +```bash + -h, --help show this help message and exit + +Basic Options + -n, --n= FFT Points + -s, --sp Single Precision + -i, --iter= Iterations + -b, --back Backward FFT + -v, --svm Use SVM + -m, --bram Use BRAM + -p, --path= Path to bitstreamm +``` + +## Output Interpretation + +The examples measure and output relevant performance metrics that are shown below: + +```bash +------------------------------------------ +FFT Configuration: +-------------------------------------------- +Type = Complex to Complex +Points = 64 +Precision = Single +Direction = Forward +Placement = In Place +Iterations = 1 +-------------------------------------------- + + Initializing FPGA ... + Getting program binary from path emu_64_fft3d_bram/fft3d_bram.aocx ... + Building program ... + FFT kernel initialization is complete. + Cleaning up FPGA resources ... + +------------------------------------------ +Measurements +-------------------------------------------- +Points = 64 +Precision = Single +Direction = Forward +PCIe Write = 0.03ms +Kernel Execution = 0.48ms +PCIe Read = 0.02ms +Throughput = 0.00GFLOPS/s | 0.00 GB/s +``` + +- `PCIe Write` and `PCIe Read` the time taken in milliseconds for transfer of data from host to global memory through PCIe bus. + +- `Kernel Execution` represents the time taken in milliseconds for the execution of the OpenCL implementation that includes the global memory accesses. + + From 0ae48a0c25c66d25d715c2d6b25e94aabff08c74 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Sat, 16 Oct 2021 18:18:00 +0200 Subject: [PATCH 67/76] fixed gitlab ci --- .gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index ed6e797..1ac227b 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -10,6 +10,7 @@ build-all: script: - rm -rf build - mkdir -p build && cd build + - srun -A pc2-mitarbeiter -p fpga --constraint=emul --pty bash - cmake -DLOG_FFT_SIZE=6 -DCMAKE_BUILD_TYPE=Release .. - make - chmod +x bin/fft From e49803cb44606eb9c81f610b6ce7e3842a87f661 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Sun, 17 Oct 2021 14:33:48 +0200 Subject: [PATCH 68/76] added power scripts --- .gitignore | 3 +- scripts/fft3d_ddr.sh | 15 - scripts/power_measure/parse_data_PACD5005.py | 11 + scripts/power_measure/power_520N.sh | 17 + scripts/power_measure/power_PACD5005.sh | 3896 ++++++++++++++++++ 5 files changed, 3926 insertions(+), 16 deletions(-) delete mode 100755 scripts/fft3d_ddr.sh create mode 100644 scripts/power_measure/parse_data_PACD5005.py create mode 100755 scripts/power_measure/power_520N.sh create mode 100755 scripts/power_measure/power_PACD5005.sh diff --git a/.gitignore b/.gitignore index 96087d3..eb54dd1 100755 --- a/.gitignore +++ b/.gitignore @@ -5,10 +5,11 @@ bin/ fpgabitstream/ reports/ vscode/ -scripts/ +scripts/slurm build* svm_build/ debug* +test_* tags *.DS_Store diff --git a/scripts/fft3d_ddr.sh b/scripts/fft3d_ddr.sh deleted file mode 100755 index 0340aae..0000000 --- a/scripts/fft3d_ddr.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash -#SBATCH -A pc2-mitarbeiter -#SBATCH -J 3dfftsyn -#SBATCH -p fpgasyn -#SBATCH --mem=90000MB -#SBATCH --time=24:00:00 - -module load intelFPGA_pro/20.1.0 nalla_pcie/19.4.0_hpc -module load numlib/FFTW - -cd ../build - -cmake -DLOG_FFT_SIZE=5 .. -make -make fft3d_ddr_triv_syn diff --git a/scripts/power_measure/parse_data_PACD5005.py b/scripts/power_measure/parse_data_PACD5005.py new file mode 100644 index 0000000..402aff5 --- /dev/null +++ b/scripts/power_measure/parse_data_PACD5005.py @@ -0,0 +1,11 @@ +import pandas as pd + +df = pd.read_csv("powermeasure.csv", header=None, names=['12v_back_a', + '12v_back_v','12v_aux_a', '12v_aux_v'], usecols=[0,1,12,13]) + +df["Sum"] = (df["12v_back_v"] * df["12v_back_a"]) + (df["12v_aux_v"] * + df["12v_aux_a"]) + +print(df.head()) +print("Average:", df["Sum"].mean()) +print("Max:", df["Sum"].max()) diff --git a/scripts/power_measure/power_520N.sh b/scripts/power_measure/power_520N.sh new file mode 100755 index 0000000..11e518d --- /dev/null +++ b/scripts/power_measure/power_520N.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +LOGFILE=powermeasure.csv + +echo "" > $LOGFILE + + +# Start the benchmark + +$@ & + +bm_pid=$! +# Start power measurements + +while $(kill -0 $bm_pid); do + echo $(/usr/share/nallatech/520n/bist/utilities/nalla_serial_cardmon/bin/nalla_serial_cardmon | grep "Total board power (W):" | sed -r 's/.*: ([0-9]+)\.([0-9]+).*/\1.\2/g') >> $LOGFILE +done diff --git a/scripts/power_measure/power_PACD5005.sh b/scripts/power_measure/power_PACD5005.sh new file mode 100755 index 0000000..8a5dc55 --- /dev/null +++ b/scripts/power_measure/power_PACD5005.sh @@ -0,0 +1,3896 @@ + +2.88,12.15,1.80,8.06,3.30,3.67,0.88,29.50,1.12,1.12,7.20,2.70,2.76,12.17,3.17,0.90 +2.88,12.15,1.80,8.06,3.30,3.67,0.88,29.50,1.12,1.12,7.20,2.62,2.76,12.17,3.17,0.90 +2.88,12.15,1.80,8.06,3.30,3.67,0.88,29.50,1.12,1.12,7.20,2.62,2.76,12.17,3.17,0.90 +2.88,12.15,1.80,8.23,3.30,3.67,0.88,29.50,1.12,1.12,7.20,2.62,2.76,12.17,3.17,0.90 +2.88,12.15,1.80,8.23,3.30,3.67,0.88,29.50,1.12,1.12,7.20,2.62,2.76,12.17,3.17,0.90 +2.88,12.15,1.80,8.23,3.30,3.67,0.88,29.50,1.12,1.12,7.20,2.62,2.76,12.17,3.17,0.90 +2.88,12.15,1.80,8.23,3.30,3.67,0.88,29.50,1.12,1.12,7.20,2.62,2.76,12.17,3.17,0.90 +2.88,12.15,1.80,8.23,3.30,3.67,0.88,29.50,1.12,1.12,7.20,2.62,2.76,12.17,3.17,0.90 +2.88,12.15,1.80,8.23,3.30,3.67,0.88,29.50,1.12,1.12,7.20,2.62,2.74,12.17,3.17,0.90 +2.88,12.15,1.80,8.23,3.30,3.67,0.88,29.50,1.12,1.12,7.20,2.62,2.74,12.17,3.17,0.90 +2.88,12.15,1.80,8.23,3.30,3.67,0.88,29.50,1.12,1.12,7.20,2.62,2.74,12.17,3.17,0.90 +2.88,12.15,1.80,8.23,3.30,3.67,0.88,29.50,1.12,1.12,7.20,2.62,2.74,12.17,3.17,0.90 +2.88,12.15,1.80,8.23,3.30,3.67,0.88,29.50,1.12,1.12,7.20,2.62,2.74,12.17,3.17,0.90 +2.88,12.15,1.80,8.23,3.30,3.67,0.88,29.50,1.12,1.12,7.20,2.62,2.74,12.17,3.17,0.90 +2.88,12.15,1.80,8.23,3.30,3.67,0.88,29.50,1.12,1.12,7.20,2.62,2.74,12.17,3.17,0.90 +2.88,12.15,1.80,8.23,3.30,3.67,0.88,29.50,1.12,1.12,7.20,2.62,2.74,12.17,3.17,0.90 +2.88,12.15,1.80,8.23,3.30,3.67,0.88,29.50,1.12,1.12,7.20,2.62,2.74,12.17,3.17,0.90 +2.88,12.15,1.80,8.23,3.30,3.67,0.88,29.50,1.12,1.12,7.20,2.62,2.74,12.17,3.17,0.90 +2.88,12.15,1.80,8.23,3.30,3.67,0.88,29.50,1.12,1.12,7.20,2.62,2.74,12.17,3.17,0.90 +2.88,12.15,1.80,8.23,3.30,3.67,0.88,29.50,1.12,1.12,7.20,2.62,2.74,12.17,3.17,0.90 +2.88,12.15,1.80,8.23,3.30,3.67,0.88,29.60,1.12,1.12,7.20,2.62,2.74,12.17,3.17,0.90 +2.88,12.15,1.80,8.23,3.30,3.67,0.88,29.60,1.12,1.12,7.20,2.62,2.74,12.17,3.17,0.90 +2.88,12.15,1.80,8.23,3.30,3.67,0.88,29.60,1.12,1.12,7.20,2.62,2.74,12.17,3.17,0.90 +2.88,12.15,1.80,8.23,3.30,3.66,0.88,29.60,1.12,1.12,7.20,2.62,2.74,12.17,3.17,0.90 +2.88,12.15,1.80,8.23,3.30,3.66,0.88,29.60,1.12,1.12,7.20,2.62,2.74,12.17,3.17,0.90 +2.88,12.15,1.80,8.23,3.30,3.66,0.88,29.60,1.12,1.12,7.20,2.62,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.23,3.30,3.66,0.88,29.60,1.12,1.12,7.20,2.62,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.23,3.30,3.66,0.88,29.60,1.12,1.12,7.20,2.62,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.23,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.23,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.23,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.23,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.66,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.23,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.66,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.23,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.66,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.66,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.66,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.66,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.66,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.66,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.66,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.66,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.66,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.66,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.66,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.66,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.66,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.66,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.66,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.66,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.66,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.66,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.70,1.12,1.12,7.14,2.66,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.70,1.12,1.12,7.14,2.66,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.70,1.12,1.12,7.14,2.66,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.70,0.88,29.70,1.12,1.12,7.14,2.66,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.70,0.88,29.70,1.12,1.12,7.14,2.66,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.70,0.88,29.70,1.12,1.12,7.14,2.66,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.70,0.88,29.70,1.12,1.12,7.14,2.66,2.74,12.17,3.11,0.90 +2.88,12.15,1.80,8.18,3.30,3.70,0.88,29.70,1.12,1.12,7.14,2.66,2.74,12.17,3.11,0.90 +2.88,12.15,1.80,8.18,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.66,2.74,12.17,3.11,0.90 +2.88,12.15,1.80,8.18,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.66,2.74,12.17,3.11,0.90 +2.88,12.15,1.80,8.18,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.66,2.74,12.17,3.11,0.90 +2.88,12.15,1.80,8.18,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,2.74,12.17,3.11,0.90 +2.88,12.15,1.80,8.18,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,2.74,12.17,3.11,0.90 +2.88,12.15,1.80,8.18,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,2.74,12.17,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,2.74,12.17,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,2.74,12.17,3.11,0.90 +2.86,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,2.74,12.17,3.11,0.90 +2.86,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,2.74,12.17,3.11,0.90 +2.86,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,2.74,12.17,3.11,0.90 +2.86,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.17,3.11,0.90 +2.86,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.17,3.11,0.90 +2.86,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.17,3.11,0.90 +2.86,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.17,3.11,0.90 +2.86,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.17,3.11,0.90 +2.86,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.17,3.11,0.90 +2.86,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.17,3.11,0.90 +2.86,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.17,3.11,0.90 +2.86,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.17,3.11,0.90 +2.86,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.17,3.11,0.90 +2.86,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.17,3.11,0.90 +2.86,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.17,3.11,0.90 +2.86,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.17,3.11,0.90 +2.86,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.17,3.11,0.90 +2.86,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.17,3.11,0.90 +2.86,12.14,1.80,8.21,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.17,3.11,0.90 +2.86,12.14,1.80,8.21,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.17,3.11,0.90 +2.86,12.14,1.80,8.21,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.17,3.11,0.90 +2.86,12.14,1.80,8.21,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.21,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.21,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.21,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.60,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.21,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.60,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.21,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.61,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.21,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.61,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.21,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.61,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.29,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.61,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.29,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.61,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.29,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.61,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.29,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.61,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.29,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.61,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.29,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.13,0.90 +2.86,12.14,1.80,8.29,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.13,0.90 +2.86,12.14,1.80,8.29,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.13,0.90 +2.86,12.14,1.80,8.29,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.13,0.90 +2.86,12.14,1.80,8.29,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.13,0.90 +2.86,12.14,1.80,8.29,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.13,0.90 +2.86,12.14,1.80,8.29,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.13,0.90 +2.86,12.14,1.80,8.29,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.13,0.90 +2.86,12.14,1.80,8.29,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.13,0.90 +2.86,12.14,1.80,8.29,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.13,0.90 +2.86,12.14,1.80,8.29,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.13,0.90 +2.86,12.14,1.80,8.29,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.13,0.90 +2.86,12.14,1.80,8.29,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.13,0.90 +2.86,12.14,1.80,8.29,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.13,0.90 +2.86,12.14,1.80,8.29,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.13,0.90 +2.86,12.14,1.80,8.29,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.13,0.90 +2.86,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.13,0.90 +2.86,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.13,0.90 +2.86,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.17,0.90 +2.86,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.17,0.90 +2.86,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.17,0.90 +2.86,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.61,2.74,12.17,3.17,0.90 +2.86,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.61,2.74,12.17,3.17,0.90 +2.86,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.61,2.74,12.17,3.17,0.90 +2.86,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.17,3.17,0.90 +2.86,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.17,3.17,0.90 +2.86,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.17,3.17,0.90 +2.86,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.17,3.17,0.90 +2.86,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.18,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.18,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.18,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.18,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.18,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.18,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.18,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.18,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.18,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.18,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.18,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.18,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.18,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.50,1.12,1.12,7.17,2.60,2.74,12.18,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.50,1.12,1.12,7.17,2.60,2.74,12.18,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.73,0.88,29.50,1.12,1.12,7.17,2.60,2.74,12.18,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.73,0.88,29.50,1.12,1.12,7.17,2.60,2.74,12.18,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.73,0.88,29.50,1.12,1.12,7.17,2.60,2.74,12.18,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.73,0.88,29.50,1.12,1.12,7.17,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.20,3.30,3.73,0.88,29.50,1.12,1.12,7.17,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.20,3.30,3.73,0.88,29.50,1.12,1.12,7.17,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.20,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.20,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.20,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.20,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.20,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.18,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.18,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.18,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.18,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.18,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.18,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.18,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.18,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.18,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.18,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.18,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.18,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.18,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.18,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.18,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.18,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.18,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.18,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.18,3.30,3.73,0.88,29.70,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.18,3.30,3.73,0.88,29.70,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.18,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.18,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.18,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.18,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.18,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.18,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.18,3.30,3.71,0.88,29.70,1.12,1.12,7.11,2.60,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.18,3.30,3.71,0.88,29.70,1.12,1.12,7.11,2.60,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.18,3.30,3.71,0.88,29.70,1.12,1.12,7.11,2.60,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.18,3.30,3.71,0.88,29.70,1.12,1.12,7.11,2.63,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.18,3.30,3.71,0.88,29.70,1.12,1.12,7.11,2.63,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.11,2.63,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.11,2.63,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.11,2.63,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.11,2.63,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.11,2.63,2.74,12.17,3.14,0.90 +2.88,12.14,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.11,2.63,2.74,12.17,3.14,0.90 +2.88,12.14,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.11,2.63,2.74,12.17,3.14,0.90 +2.88,12.14,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.11,2.63,2.74,12.17,3.14,0.90 +2.88,12.14,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.11,2.63,2.74,12.17,3.14,0.90 +2.88,12.14,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.11,2.63,2.74,12.17,3.14,0.90 +2.88,12.14,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.11,2.63,2.74,12.17,3.14,0.90 +2.88,12.14,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.11,2.63,2.74,12.17,3.14,0.90 +2.88,12.14,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.11,2.63,2.74,12.17,3.14,0.90 +2.88,12.14,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.11,2.63,2.74,12.17,3.14,0.90 +2.88,12.14,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.11,2.63,2.74,12.17,3.14,0.90 +2.88,12.14,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.11,2.63,2.74,12.17,3.14,0.90 +2.88,12.14,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.11,2.63,2.74,12.17,3.14,0.90 +2.88,12.14,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.11,2.63,2.74,12.17,3.14,0.90 +2.88,12.14,1.80,8.15,3.30,3.71,0.88,29.60,1.12,1.12,7.11,2.63,2.74,12.17,3.14,0.90 +2.88,12.14,1.80,8.15,3.30,3.71,0.88,29.60,1.12,1.12,7.11,2.63,2.74,12.17,3.14,0.90 +2.88,12.14,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.11,2.63,2.74,12.17,3.14,0.90 +2.88,12.14,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.11,2.63,2.74,12.17,3.14,0.90 +2.88,12.14,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.11,2.63,2.74,12.17,3.14,0.90 +2.88,12.14,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.11,2.63,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.11,2.63,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.11,2.63,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.28,2.63,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.28,2.63,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.28,2.61,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.28,2.61,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.28,2.61,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.28,2.61,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.28,2.61,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.28,2.61,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.28,2.61,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.28,2.61,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.28,2.61,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.28,2.61,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.28,2.61,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.28,2.61,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.28,2.61,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.28,2.61,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.28,2.61,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.28,2.61,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.28,2.61,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.28,2.61,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.28,2.61,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.28,2.61,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.28,2.61,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,29.70,1.12,1.12,7.28,2.61,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,29.70,1.12,1.12,7.28,2.61,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.28,2.61,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.28,2.61,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.28,2.61,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.28,2.61,2.74,12.17,3.12,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.28,2.61,2.74,12.17,3.12,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.28,2.61,2.74,12.17,3.12,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.15,2.61,2.74,12.17,3.12,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.15,2.61,2.74,12.17,3.12,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.15,2.61,2.74,12.17,3.12,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.15,2.65,2.74,12.17,3.12,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.15,2.65,2.74,12.17,3.12,0.90 +2.88,12.15,1.80,8.26,3.30,3.71,0.88,29.70,1.12,1.12,7.15,2.65,2.74,12.17,3.12,0.90 +2.88,12.15,1.80,8.26,3.30,3.71,0.88,29.70,1.12,1.12,7.15,2.65,2.74,12.17,3.12,0.90 +2.88,12.14,1.80,8.26,3.30,3.71,0.88,29.70,1.12,1.12,7.15,2.65,2.74,12.17,3.12,0.90 +2.86,12.14,1.80,8.26,3.30,3.71,0.88,29.70,1.12,1.12,7.15,2.65,2.74,12.17,3.12,0.90 +2.86,12.14,1.80,8.26,3.30,3.71,0.88,29.70,1.12,1.12,7.15,2.65,2.74,12.18,3.12,0.90 +2.86,12.14,1.80,8.26,3.30,3.71,0.88,29.70,1.12,1.12,7.15,2.65,2.76,12.18,3.12,0.90 +2.86,12.14,1.80,8.26,3.30,3.71,0.88,29.70,1.12,1.12,7.15,2.65,2.76,12.18,3.12,0.90 +2.86,12.14,1.80,8.26,3.30,3.71,0.88,29.70,1.12,1.12,7.15,2.65,2.76,12.18,3.12,0.90 +2.86,12.14,1.80,8.26,3.30,3.71,0.88,29.70,1.12,1.12,7.15,2.65,2.76,12.18,3.12,0.90 +2.86,12.14,1.80,8.26,3.30,3.71,0.88,29.70,1.12,1.12,7.15,2.65,2.76,12.18,3.12,0.90 +2.86,12.14,1.80,8.26,3.30,3.71,0.88,29.70,1.12,1.12,7.15,2.65,2.76,12.18,3.12,0.90 +2.86,12.14,1.80,8.26,3.30,3.71,0.88,29.70,1.12,1.12,7.15,2.65,2.76,12.18,3.12,0.90 +2.86,12.14,1.80,8.26,3.30,3.71,0.88,29.70,1.12,1.12,7.15,2.65,2.76,12.18,3.12,0.90 +2.86,12.14,1.80,8.26,3.30,3.71,0.88,29.70,1.12,1.12,7.15,2.65,2.76,12.18,3.12,0.90 +2.86,12.14,1.80,8.26,3.30,3.71,0.88,29.70,1.12,1.12,7.15,2.65,2.76,12.18,3.12,0.90 +2.86,12.14,1.80,8.26,3.30,3.71,0.88,29.70,1.12,1.12,7.15,2.65,2.76,12.18,3.12,0.90 +2.86,12.14,1.80,8.26,3.30,3.71,0.88,29.70,1.12,1.12,7.15,2.65,2.76,12.18,3.12,0.90 +2.86,12.14,1.80,8.26,3.30,3.71,0.88,29.70,1.12,1.12,7.15,2.65,2.76,12.18,3.12,0.90 +2.86,12.14,1.80,8.26,3.30,3.71,0.88,29.60,1.12,1.12,7.15,2.65,2.76,12.18,3.12,0.90 +2.86,12.14,1.80,8.26,3.30,3.71,0.88,29.60,1.12,1.12,7.15,2.65,2.76,12.18,3.12,0.90 +2.86,12.14,1.80,8.26,3.30,3.71,0.88,29.60,1.12,1.12,7.15,2.65,2.76,12.18,3.12,0.90 +2.86,12.14,1.80,8.26,3.30,3.65,0.88,29.60,1.12,1.12,7.15,2.65,2.76,12.18,3.12,0.90 +2.86,12.14,1.80,8.26,3.30,3.65,0.88,29.60,1.12,1.12,7.15,2.65,2.76,12.18,3.12,0.90 +2.86,12.14,1.80,8.26,3.30,3.65,0.88,29.60,1.12,1.12,7.15,2.65,2.76,12.18,3.15,0.90 +2.86,12.14,1.80,8.26,3.30,3.65,0.88,29.60,1.12,1.12,7.15,2.65,2.76,12.18,3.15,0.90 +2.86,12.14,1.80,8.26,3.30,3.65,0.88,29.60,1.12,1.12,7.15,2.65,2.76,12.18,3.15,0.90 +2.86,12.14,1.80,8.26,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.65,2.76,12.18,3.15,0.90 +2.86,12.14,1.80,8.26,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.65,2.76,12.18,3.15,0.90 +2.86,12.14,1.80,8.26,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.65,2.76,12.18,3.15,0.90 +2.86,12.14,1.80,8.26,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.64,2.76,12.18,3.15,0.90 +2.86,12.14,1.80,8.26,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.64,2.76,12.18,3.15,0.90 +2.86,12.14,1.80,8.26,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.64,2.76,12.18,3.15,0.90 +2.86,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.64,2.76,12.18,3.15,0.90 +2.86,12.15,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.64,2.76,12.18,3.15,0.90 +2.88,12.15,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.64,2.76,12.18,3.15,0.90 +2.88,12.15,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.64,2.76,12.18,3.15,0.90 +2.88,12.15,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.64,2.78,12.18,3.15,0.90 +2.88,12.15,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.64,2.78,12.18,3.15,0.90 +2.88,12.15,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.64,2.78,12.18,3.15,0.90 +2.88,12.15,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.64,2.78,12.18,3.15,0.90 +2.88,12.15,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.64,2.78,12.18,3.15,0.90 +2.88,12.15,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.64,2.78,12.18,3.15,0.90 +2.88,12.15,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.64,2.78,12.18,3.15,0.90 +2.88,12.15,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.64,2.78,12.18,3.15,0.90 +2.88,12.15,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.64,2.78,12.18,3.15,0.90 +2.88,12.15,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.64,2.78,12.18,3.15,0.90 +2.88,12.15,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.64,2.78,12.18,3.15,0.90 +2.88,12.15,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.64,2.78,12.18,3.15,0.90 +2.88,12.15,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.64,2.78,12.18,3.15,0.90 +2.88,12.15,1.80,8.21,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.64,2.78,12.18,3.15,0.90 +2.88,12.15,1.80,8.21,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.64,2.78,12.18,3.15,0.90 +2.88,12.15,1.80,8.21,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.64,2.78,12.18,3.15,0.90 +2.88,12.15,1.80,8.21,3.30,3.62,0.88,29.70,1.12,1.12,7.21,2.64,2.78,12.18,3.15,0.90 +2.88,12.15,1.80,8.21,3.30,3.62,0.88,29.70,1.12,1.12,7.21,2.64,2.78,12.18,3.15,0.90 +2.88,12.15,1.80,8.21,3.30,3.62,0.88,29.70,1.12,1.12,7.21,2.64,2.78,12.18,3.15,0.90 +2.88,12.15,1.80,8.21,3.30,3.62,0.88,29.70,1.12,1.12,7.21,2.64,2.78,12.18,3.10,0.90 +2.88,12.15,1.80,8.21,3.30,3.62,0.88,29.70,1.12,1.12,7.21,2.64,2.78,12.18,3.10,0.90 +2.88,12.15,1.80,8.21,3.30,3.62,0.88,29.70,1.12,1.12,7.23,2.64,2.78,12.18,3.10,0.90 +2.88,12.15,1.80,8.21,3.30,3.62,0.88,29.70,1.12,1.12,7.23,2.64,2.78,12.18,3.10,0.90 +2.88,12.15,1.80,8.21,3.30,3.62,0.88,29.70,1.12,1.12,7.23,2.64,2.78,12.18,3.10,0.90 +2.88,12.15,1.80,8.21,3.30,3.62,0.88,29.70,1.12,1.12,7.23,2.60,2.78,12.18,3.10,0.90 +2.88,12.15,1.80,8.21,3.30,3.62,0.88,29.70,1.12,1.12,7.23,2.60,2.78,12.18,3.10,0.90 +2.88,12.15,1.80,8.21,3.30,3.62,0.88,29.70,1.12,1.12,7.23,2.60,2.78,12.18,3.10,0.90 +2.88,12.15,1.80,8.20,3.30,3.62,0.88,29.70,1.12,1.12,7.23,2.60,2.78,12.18,3.10,0.90 +2.88,12.15,1.80,8.20,3.30,3.62,0.88,29.70,1.12,1.12,7.23,2.60,2.78,12.18,3.10,0.90 +2.86,12.14,1.80,8.20,3.30,3.62,0.88,29.70,1.12,1.12,7.23,2.60,2.78,12.18,3.10,0.90 +2.86,12.14,1.80,8.20,3.30,3.62,0.88,29.70,1.12,1.12,7.23,2.60,2.78,12.18,3.10,0.90 +2.86,12.14,1.80,8.20,3.30,3.62,0.88,29.70,1.12,1.12,7.23,2.60,2.76,12.18,3.10,0.90 +2.86,12.14,1.80,8.20,3.30,3.62,0.88,29.70,1.12,1.12,7.23,2.60,2.76,12.18,3.10,0.90 +2.86,12.14,1.80,8.20,3.30,3.62,0.88,29.70,1.12,1.12,7.23,2.60,2.76,12.18,3.10,0.90 +2.86,12.14,1.80,8.20,3.30,3.62,0.88,29.70,1.12,1.12,7.23,2.60,2.76,12.18,3.10,0.90 +2.86,12.14,1.80,8.20,3.30,3.62,0.88,29.70,1.12,1.12,7.23,2.60,2.76,12.18,3.10,0.90 +2.86,12.14,1.80,8.20,3.30,3.62,0.88,29.70,1.12,1.12,7.23,2.60,2.76,12.18,3.10,0.90 +2.86,12.14,1.80,8.20,3.30,3.62,0.88,29.70,1.12,1.12,7.23,2.60,2.76,12.18,3.10,0.90 +2.86,12.14,1.80,8.20,3.30,3.62,0.88,29.70,1.12,1.12,7.23,2.60,2.76,12.18,3.10,0.90 +2.86,12.14,1.80,8.20,3.30,3.62,0.88,29.70,1.12,1.12,7.23,2.60,2.76,12.18,3.10,0.90 +2.86,12.14,1.80,8.20,3.30,3.62,0.88,29.70,1.12,1.12,7.23,2.60,2.76,12.18,3.10,0.90 +2.86,12.14,1.80,8.20,3.30,3.62,0.88,29.70,1.12,1.12,7.23,2.60,2.76,12.18,3.10,0.90 +2.86,12.14,1.80,8.20,3.30,3.62,0.88,29.70,1.12,1.12,7.23,2.60,2.76,12.18,3.10,0.90 +2.86,12.14,1.80,8.20,3.30,3.62,0.88,29.70,1.12,1.12,7.23,2.60,2.76,12.18,3.10,0.90 +2.86,12.14,1.80,8.20,3.30,3.62,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.18,3.10,0.90 +2.86,12.14,1.80,8.20,3.30,3.62,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.18,3.10,0.90 +2.86,12.14,1.80,8.20,3.30,3.62,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.18,3.10,0.90 +2.86,12.14,1.80,8.20,3.30,3.65,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.18,3.10,0.90 +2.86,12.14,1.80,8.20,3.30,3.65,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.18,3.10,0.90 +2.86,12.14,1.80,8.20,3.30,3.65,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.18,3.14,0.90 +2.86,12.14,1.80,8.20,3.30,3.65,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.18,3.14,0.90 +2.86,12.14,1.80,8.20,3.30,3.65,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.18,3.14,0.90 +2.86,12.14,1.80,8.20,3.30,3.65,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.18,3.14,0.90 +2.86,12.14,1.80,8.20,3.30,3.65,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.18,3.14,0.90 +2.86,12.14,1.80,8.20,3.30,3.65,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.18,3.14,0.90 +2.86,12.14,1.80,8.20,3.30,3.65,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.18,3.14,0.90 +2.86,12.14,1.80,8.20,3.30,3.65,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.18,3.14,0.90 +2.86,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.18,3.14,0.90 +2.86,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.18,3.14,0.90 +2.86,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.18,3.14,0.90 +2.86,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.18,3.14,0.90 +2.86,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.25,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.25,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.25,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.17,3.10,0.90 +2.86,12.14,1.80,8.25,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.17,3.10,0.90 +2.86,12.14,1.80,8.25,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.17,3.10,0.90 +2.86,12.14,1.80,8.25,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.61,2.76,12.17,3.10,0.90 +2.86,12.14,1.80,8.25,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.61,2.76,12.17,3.10,0.90 +2.86,12.14,1.80,8.25,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.61,2.76,12.17,3.10,0.90 +2.86,12.14,1.80,8.25,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.60,2.76,12.17,3.10,0.90 +2.86,12.14,1.80,8.25,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.60,2.76,12.17,3.10,0.90 +2.86,12.14,1.80,8.25,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.60,2.76,12.17,3.10,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.60,2.76,12.17,3.10,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.60,2.76,12.17,3.10,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.60,2.76,12.17,3.10,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.60,2.76,12.18,3.10,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.60,2.74,12.18,3.10,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.60,2.74,12.18,3.10,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.60,2.74,12.18,3.10,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.60,2.74,12.18,3.10,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.60,2.74,12.18,3.10,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.60,2.74,12.18,3.10,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.60,2.74,12.18,3.10,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.60,2.74,12.18,3.10,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.60,2.74,12.18,3.10,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.60,2.74,12.18,3.10,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.60,2.74,12.18,3.10,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.60,2.74,12.18,3.10,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.60,2.74,12.18,3.10,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.70,1.12,1.12,7.22,2.60,2.74,12.18,3.10,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.70,1.12,1.12,7.22,2.60,2.74,12.18,3.10,0.90 +2.86,12.14,1.80,8.12,3.30,3.69,0.88,29.70,1.12,1.12,7.22,2.60,2.74,12.18,3.10,0.90 +2.86,12.14,1.80,8.12,3.30,3.69,0.88,29.70,1.12,1.12,7.22,2.60,2.74,12.18,3.10,0.90 +2.86,12.14,1.80,8.12,3.30,3.69,0.88,29.70,1.12,1.12,7.22,2.60,2.74,12.18,3.10,0.90 +2.86,12.14,1.80,8.12,3.30,3.69,0.88,29.70,1.12,1.12,7.22,2.60,2.74,12.18,3.15,0.90 +2.86,12.14,1.80,8.12,3.30,3.69,0.88,29.70,1.12,1.12,7.22,2.60,2.74,12.18,3.15,0.90 +2.86,12.14,1.80,8.12,3.30,3.69,0.88,29.70,1.12,1.12,7.22,2.60,2.74,12.18,3.15,0.90 +2.86,12.14,1.80,8.12,3.30,3.69,0.88,29.70,1.12,1.12,7.24,2.60,2.74,12.18,3.15,0.90 +2.86,12.14,1.80,8.12,3.30,3.69,0.88,29.70,1.12,1.12,7.24,2.60,2.74,12.18,3.15,0.90 +2.86,12.14,1.80,8.12,3.30,3.69,0.88,29.70,1.12,1.12,7.24,2.64,2.74,12.18,3.15,0.90 +2.86,12.14,1.80,8.12,3.30,3.69,0.88,29.70,1.12,1.12,7.24,2.64,2.74,12.18,3.15,0.90 +2.86,12.14,1.80,8.12,3.30,3.69,0.88,29.70,1.12,1.12,7.24,2.64,2.74,12.18,3.15,0.90 +2.86,12.14,1.80,8.14,3.30,3.69,0.88,29.70,1.12,1.12,7.24,2.64,2.74,12.18,3.15,0.90 +2.86,12.14,1.80,8.14,3.30,3.69,0.88,29.70,1.12,1.12,7.24,2.64,2.74,12.18,3.15,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,29.70,1.12,1.12,7.24,2.64,2.74,12.18,3.15,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,29.70,1.12,1.12,7.24,2.64,2.74,12.18,3.15,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,29.70,1.12,1.12,7.24,2.64,2.74,12.17,3.15,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,29.70,1.12,1.12,7.24,2.64,2.76,12.17,3.15,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,29.70,1.12,1.12,7.24,2.64,2.76,12.17,3.15,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,29.70,1.12,1.12,7.24,2.64,2.76,12.17,3.15,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,29.70,1.12,1.12,7.24,2.64,2.76,12.17,3.15,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,29.70,1.12,1.12,7.24,2.64,2.76,12.17,3.15,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,29.70,1.12,1.12,7.24,2.64,2.76,12.17,3.15,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,29.70,1.12,1.12,7.24,2.64,2.76,12.17,3.15,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,29.70,1.12,1.12,7.24,2.64,2.76,12.17,3.15,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,29.70,1.12,1.12,7.24,2.64,2.76,12.17,3.15,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,29.70,1.12,1.12,7.24,2.64,2.76,12.17,3.15,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,29.70,1.12,1.12,7.24,2.64,2.76,12.17,3.15,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,29.70,1.12,1.12,7.24,2.64,2.76,12.17,3.15,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,36.80,1.12,1.12,7.24,2.64,2.76,12.17,3.15,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,36.80,1.12,1.12,7.24,2.64,2.76,12.17,3.15,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,36.80,1.12,1.12,7.24,2.64,2.76,12.17,3.15,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,36.80,1.12,1.12,7.24,2.64,2.76,12.17,3.15,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,36.80,1.12,1.12,7.24,2.64,2.76,12.17,3.15,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,36.80,1.12,1.12,7.24,2.64,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,36.80,1.12,1.12,7.24,2.64,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,36.80,1.12,1.12,7.21,2.64,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,36.80,1.12,1.12,7.21,2.64,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,36.80,1.12,1.12,7.21,2.64,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,36.80,1.12,1.12,7.21,2.60,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,36.80,1.12,1.12,7.21,2.60,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.14,3.30,3.69,0.88,36.80,1.12,1.12,7.21,2.60,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.15,3.30,3.69,0.88,36.80,1.12,1.12,7.21,2.60,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.15,3.30,3.69,0.88,36.80,1.12,1.12,7.21,2.60,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.15,3.30,3.69,0.88,36.80,1.12,1.12,7.21,2.60,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.15,3.30,3.69,0.88,36.80,1.12,1.12,7.21,2.60,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.15,3.30,3.69,0.88,36.80,1.12,1.12,7.21,2.60,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.15,3.30,3.69,0.88,36.80,1.12,1.12,7.21,2.60,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.15,3.30,3.69,0.88,36.80,1.12,1.12,7.21,2.60,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.15,3.30,3.69,0.88,36.80,1.12,1.12,7.21,2.60,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.15,3.30,3.69,0.88,36.80,1.12,1.12,7.21,2.60,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.15,3.30,3.69,0.88,36.80,1.12,1.12,7.21,2.60,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.15,3.30,3.69,0.88,36.80,1.12,1.12,7.21,2.60,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.15,3.30,3.69,0.88,36.80,1.12,1.12,7.21,2.60,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.15,3.30,3.69,0.88,36.80,1.12,1.12,7.21,2.60,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.15,3.30,3.69,0.88,36.80,1.12,1.12,7.21,2.60,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.15,3.30,3.69,0.88,36.80,1.12,1.12,7.21,2.60,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.15,3.30,3.69,0.88,36.80,1.12,1.12,7.21,2.60,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.15,3.30,3.69,0.88,36.80,1.12,1.12,7.21,2.60,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.15,3.30,3.69,0.88,29.50,1.12,1.12,7.21,2.60,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.15,3.30,3.69,0.88,29.50,1.12,1.12,7.21,2.60,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.15,3.30,3.69,0.88,29.50,1.12,1.12,7.21,2.60,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.15,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.15,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.15,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.73,0.88,29.50,1.12,1.12,7.21,2.60,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.73,0.88,29.50,1.12,1.12,7.16,2.60,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.73,0.88,29.50,1.12,1.12,7.16,2.60,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.73,0.88,29.50,1.12,1.12,7.16,2.63,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.73,0.88,29.50,1.12,1.12,7.16,2.63,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.73,0.88,29.50,1.12,1.12,7.16,2.63,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.14,3.30,3.73,0.88,29.50,1.12,1.12,7.16,2.63,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.14,3.30,3.73,0.88,29.50,1.12,1.12,7.16,2.63,2.76,12.17,3.07,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.50,1.12,1.12,7.16,2.63,2.76,12.17,3.07,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.50,1.12,1.12,7.16,2.63,2.76,12.17,3.07,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.50,1.12,1.12,7.16,2.63,2.76,12.18,3.07,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.50,1.12,1.12,7.16,2.63,2.74,12.18,3.07,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.50,1.12,1.12,7.16,2.63,2.74,12.18,3.07,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.50,1.12,1.12,7.16,2.63,2.74,12.18,3.07,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.50,1.12,1.12,7.16,2.63,2.74,12.18,3.07,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.50,1.12,1.12,7.16,2.63,2.74,12.18,3.07,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.50,1.12,1.12,7.16,2.63,2.74,12.18,3.07,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.50,1.12,1.12,7.16,2.63,2.74,12.18,3.07,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.50,1.12,1.12,7.16,2.63,2.74,12.18,3.07,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.50,1.12,1.12,7.16,2.63,2.74,12.18,3.07,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.50,1.12,1.12,7.16,2.63,2.74,12.18,3.07,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.50,1.12,1.12,7.16,2.63,2.74,12.18,3.07,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.50,1.12,1.12,7.16,2.63,2.74,12.18,3.07,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.60,1.12,1.12,7.16,2.63,2.74,12.18,3.07,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.60,1.12,1.12,7.16,2.63,2.74,12.18,3.07,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.60,1.12,1.12,7.16,2.63,2.74,12.18,3.07,0.90 +2.86,12.15,1.80,8.14,3.30,3.70,0.88,29.60,1.12,1.12,7.16,2.63,2.74,12.18,3.07,0.90 +2.86,12.15,1.80,8.14,3.30,3.70,0.88,29.60,1.12,1.12,7.16,2.63,2.74,12.18,3.07,0.90 +2.86,12.15,1.80,8.14,3.30,3.70,0.88,29.60,1.12,1.12,7.16,2.63,2.74,12.18,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.70,0.88,29.60,1.12,1.12,7.16,2.63,2.74,12.18,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.70,0.88,29.60,1.12,1.12,7.16,2.63,2.74,12.18,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.70,0.88,29.60,1.12,1.12,7.20,2.63,2.74,12.18,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.70,0.88,29.60,1.12,1.12,7.20,2.63,2.74,12.18,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.70,0.88,29.60,1.12,1.12,7.20,2.63,2.74,12.18,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.70,0.88,29.60,1.12,1.12,7.20,2.61,2.74,12.18,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.70,0.88,29.60,1.12,1.12,7.20,2.61,2.74,12.18,3.15,0.90 +2.86,12.15,1.80,8.12,3.30,3.70,0.88,29.60,1.12,1.12,7.20,2.61,2.74,12.18,3.15,0.90 +2.86,12.15,1.80,8.12,3.30,3.70,0.88,29.60,1.12,1.12,7.20,2.61,2.74,12.18,3.15,0.90 +2.86,12.14,1.80,8.12,3.30,3.70,0.88,29.60,1.12,1.12,7.20,2.61,2.74,12.18,3.15,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.60,1.12,1.12,7.20,2.61,2.74,12.18,3.15,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.60,1.12,1.12,7.20,2.61,2.74,12.18,3.15,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.60,1.12,1.12,7.20,2.61,2.74,12.18,3.15,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.60,1.12,1.12,7.20,2.61,2.74,12.18,3.15,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.60,1.12,1.12,7.20,2.61,2.74,12.18,3.15,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.60,1.12,1.12,7.20,2.61,2.74,12.18,3.15,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.60,1.12,1.12,7.20,2.61,2.74,12.18,3.15,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.60,1.12,1.12,7.20,2.61,2.74,12.18,3.15,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.60,1.12,1.12,7.20,2.61,2.74,12.18,3.15,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.60,1.12,1.12,7.20,2.61,2.74,12.18,3.15,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.60,1.12,1.12,7.20,2.61,2.74,12.18,3.15,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.60,1.12,1.12,7.20,2.61,2.74,12.18,3.15,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.60,1.12,1.12,7.20,2.61,2.74,12.18,3.15,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.60,1.12,1.12,7.20,2.61,2.74,12.18,3.15,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.20,2.61,2.74,12.18,3.15,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.20,2.61,2.74,12.18,3.15,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.20,2.61,2.74,12.18,3.15,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.20,2.61,2.74,12.18,3.15,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.20,2.61,2.74,12.18,3.15,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.20,2.61,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.20,2.61,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.20,2.61,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.10,2.61,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.10,2.61,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.10,2.61,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.10,2.60,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.10,2.60,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.70,0.88,29.50,1.12,1.12,7.10,2.60,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.70,0.88,29.50,1.12,1.12,7.10,2.60,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.70,0.88,29.50,1.12,1.12,7.10,2.60,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.70,0.88,29.50,1.12,1.12,7.10,2.60,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.70,0.88,29.50,1.12,1.12,7.10,2.60,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.70,0.88,29.50,1.12,1.12,7.10,2.60,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.70,0.88,29.50,1.12,1.12,7.10,2.60,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.70,0.88,29.50,1.12,1.12,7.10,2.60,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.70,0.88,29.50,1.12,1.12,7.10,2.60,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.70,0.88,29.50,1.12,1.12,7.10,2.60,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.70,0.88,29.50,1.12,1.12,7.10,2.60,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.70,0.88,29.50,1.12,1.12,7.10,2.60,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.70,0.88,29.50,1.12,1.12,7.10,2.60,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.70,0.88,29.50,1.12,1.12,7.10,2.60,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.70,0.88,29.50,1.12,1.12,7.10,2.60,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.70,0.88,29.50,1.12,1.12,7.10,2.60,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.70,0.88,29.50,1.12,1.12,7.10,2.60,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.70,0.88,36.80,1.12,1.12,7.10,2.60,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.70,0.88,36.80,1.12,1.12,7.10,2.60,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.73,0.88,36.80,1.12,1.12,7.10,2.60,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.73,0.88,36.80,1.12,1.12,7.10,2.60,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.73,0.88,36.80,1.12,1.12,7.10,2.60,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.73,0.88,36.80,1.12,1.12,7.10,2.60,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.20,3.30,3.73,0.88,36.80,1.12,1.12,7.10,2.60,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.20,3.30,3.73,0.88,36.80,1.12,1.12,7.10,2.60,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.20,3.30,3.73,0.88,36.80,1.12,1.12,7.28,2.60,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.20,3.30,3.73,0.88,36.80,1.12,1.12,7.28,2.60,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.20,3.30,3.73,0.88,36.80,1.12,1.12,7.28,2.68,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.20,3.30,3.73,0.88,36.80,1.12,1.12,7.28,2.68,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.20,3.30,3.73,0.88,36.80,1.12,1.12,7.28,2.68,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.15,3.30,3.73,0.88,36.80,1.12,1.12,7.28,2.68,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.15,3.30,3.73,0.88,36.80,1.12,1.12,7.28,2.68,2.76,12.18,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,36.80,1.12,1.12,7.28,2.68,2.76,12.18,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,36.80,1.12,1.12,7.28,2.68,2.76,12.18,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,36.80,1.12,1.12,7.28,2.68,2.76,12.18,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,36.80,1.12,1.12,7.28,2.68,2.76,12.18,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,36.80,1.12,1.12,7.28,2.68,2.76,12.18,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,36.80,1.12,1.12,7.28,2.68,2.76,12.18,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,36.80,1.12,1.12,7.28,2.68,2.76,12.18,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,36.80,1.12,1.12,7.28,2.68,2.76,12.18,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,36.80,1.12,1.12,7.28,2.68,2.76,12.18,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,36.80,1.12,1.12,7.28,2.68,2.76,12.18,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,36.80,1.12,1.12,7.28,2.68,2.76,12.18,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,36.80,1.12,1.12,7.28,2.68,2.76,12.18,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,36.80,1.12,1.12,7.28,2.68,2.76,12.18,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,36.80,1.12,1.12,7.28,2.68,2.76,12.18,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,36.80,1.12,1.12,7.28,2.68,2.76,12.18,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.28,2.68,2.76,12.18,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.28,2.68,2.76,12.18,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.73,0.88,29.60,1.12,1.12,7.28,2.68,2.76,12.18,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.60,1.12,1.12,7.28,2.68,2.76,12.18,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.60,1.12,1.12,7.28,2.68,2.76,12.18,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.60,1.12,1.12,7.28,2.68,2.76,12.18,3.17,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.60,1.12,1.12,7.28,2.68,2.76,12.18,3.17,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.60,1.12,1.12,7.28,2.68,2.76,12.18,3.17,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.68,2.76,12.18,3.17,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.68,2.76,12.18,3.17,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.68,2.76,12.18,3.17,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.65,2.76,12.18,3.17,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.65,2.76,12.18,3.17,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.65,2.76,12.18,3.17,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.65,2.76,12.18,3.17,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.65,2.76,12.18,3.17,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.65,2.76,12.18,3.17,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.65,2.76,12.18,3.17,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.65,2.74,12.18,3.17,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.65,2.74,12.18,3.17,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.65,2.74,12.18,3.17,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.65,2.74,12.18,3.17,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.65,2.74,12.18,3.17,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.65,2.74,12.18,3.17,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.65,2.74,12.18,3.17,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.65,2.74,12.18,3.17,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.65,2.74,12.18,3.17,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.65,2.74,12.18,3.17,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.65,2.74,12.18,3.17,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.65,2.74,12.18,3.17,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.65,2.74,12.18,3.17,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.65,2.74,12.18,3.17,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.65,2.74,12.18,3.17,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.60,1.12,1.12,7.17,2.65,2.74,12.18,3.17,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.60,1.12,1.12,7.17,2.65,2.74,12.18,3.17,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.60,1.12,1.12,7.17,2.65,2.74,12.18,3.17,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.60,1.12,1.12,7.17,2.65,2.74,12.18,3.13,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.60,1.12,1.12,7.17,2.65,2.74,12.18,3.13,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.60,1.12,1.12,7.17,2.65,2.74,12.18,3.13,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.65,2.74,12.18,3.13,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.65,2.74,12.18,3.13,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.63,2.74,12.18,3.13,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.63,2.74,12.18,3.13,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.63,2.74,12.18,3.13,0.90 +2.88,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.63,2.74,12.18,3.13,0.90 +2.88,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.63,2.74,12.18,3.13,0.90 +2.88,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.63,2.74,12.18,3.13,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.63,2.74,12.18,3.13,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.63,2.74,12.17,3.13,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.63,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.63,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.63,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.63,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.63,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.63,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.63,2.76,12.17,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.63,2.76,12.17,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.63,2.76,12.17,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.63,2.76,12.17,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.63,2.76,12.17,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.67,2.76,12.17,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.67,2.76,12.17,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.67,2.76,12.17,3.10,0.90 +2.86,12.15,1.80,8.65,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.67,2.76,12.17,3.10,0.90 +2.86,12.14,1.80,8.65,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.67,2.76,12.17,3.10,0.90 +3.14,12.14,1.80,8.65,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.67,2.76,12.17,3.10,0.90 +3.14,12.14,1.80,8.65,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.67,2.76,12.17,3.10,0.90 +3.14,12.14,1.80,8.65,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.67,3.35,12.17,3.10,0.90 +3.14,12.14,1.80,8.65,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.67,3.35,12.17,3.10,0.90 +3.14,12.14,1.80,8.65,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.67,3.35,12.17,3.10,0.90 +3.14,12.14,1.80,8.65,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.67,3.35,12.17,3.10,0.90 +3.14,12.14,1.80,8.65,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.67,3.35,12.17,3.10,0.90 +3.14,12.14,1.80,8.65,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.67,3.35,12.17,3.10,0.90 +3.14,12.14,1.80,8.65,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.67,3.35,12.17,3.10,0.90 +3.14,12.14,1.80,8.65,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.67,3.35,12.17,3.10,0.90 +3.14,12.14,1.80,8.65,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.67,3.35,12.17,3.10,0.90 +3.14,12.14,1.80,8.65,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.67,3.35,12.17,3.10,0.90 +3.14,12.14,1.80,8.65,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.67,3.35,12.17,3.10,0.90 +3.14,12.14,1.80,8.65,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.67,3.35,12.17,3.10,0.90 +3.14,12.14,1.80,8.65,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.67,3.35,12.17,3.10,0.90 +3.14,12.14,1.80,8.65,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.67,3.35,12.17,3.10,0.90 +3.14,12.14,1.80,8.65,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.67,3.35,12.17,3.10,0.90 +3.14,12.14,1.80,8.65,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.67,3.35,12.17,3.10,0.90 +3.14,12.14,1.80,8.65,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.67,3.35,12.17,3.10,0.90 +3.14,12.14,1.80,8.65,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.67,3.35,12.17,3.10,0.90 +3.14,12.14,1.80,8.65,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.67,3.35,12.17,3.14,0.90 +3.14,12.14,1.80,8.65,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.67,3.35,12.17,3.14,0.90 +3.14,12.14,1.80,8.65,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.67,3.35,12.17,3.14,0.90 +3.14,12.14,1.80,8.65,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.67,3.35,12.17,3.14,0.90 +3.14,12.14,1.80,8.65,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.67,3.35,12.17,3.14,0.90 +3.14,12.14,1.80,8.65,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.61,3.35,12.17,3.14,0.90 +3.14,12.14,1.80,8.65,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.61,3.35,12.17,3.14,0.90 +3.14,12.14,1.80,8.65,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.61,3.35,12.17,3.14,0.90 +3.14,12.14,1.80,8.20,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.61,3.35,12.17,3.14,0.90 +3.14,12.14,1.80,8.20,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.61,3.35,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.61,3.35,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.61,3.35,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.61,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.61,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.61,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.61,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.61,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.61,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.61,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.61,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.61,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.61,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.61,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.61,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.61,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.61,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.61,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.61,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.61,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.61,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.61,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.61,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.61,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.61,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.61,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.61,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.66,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.20,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.66,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.15,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.66,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.15,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.66,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.15,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.66,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.15,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.66,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.15,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.66,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.15,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.66,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.15,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.66,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.15,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.66,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.15,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.66,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.15,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.66,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.15,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.66,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.15,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.66,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.15,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.66,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.15,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.66,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.15,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.66,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.15,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.66,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.15,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.66,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.15,3.30,3.78,0.88,29.70,1.12,1.12,7.17,2.66,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.15,3.30,3.78,0.88,29.60,1.12,1.12,7.17,2.66,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.15,3.30,3.78,0.88,29.60,1.12,1.12,7.17,2.66,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.15,3.30,3.75,0.88,29.60,1.12,1.12,7.17,2.66,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.15,3.30,3.75,0.88,29.60,1.12,1.12,7.17,2.66,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.15,3.30,3.75,0.88,29.60,1.12,1.12,7.17,2.66,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.15,3.30,3.75,0.88,29.60,1.12,1.12,7.17,2.66,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.15,3.30,3.75,0.88,29.60,1.12,1.12,7.17,2.66,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.15,3.30,3.75,0.88,29.60,1.12,1.12,7.17,2.66,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.15,3.30,3.75,0.88,29.60,1.12,1.12,7.25,2.66,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.15,3.30,3.75,0.88,29.60,1.12,1.12,7.25,2.66,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.15,3.30,3.75,0.88,29.60,1.12,1.12,7.25,2.64,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.15,3.30,3.75,0.88,29.60,1.12,1.12,7.25,2.64,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.15,3.30,3.75,0.88,29.60,1.12,1.12,7.25,2.64,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.18,3.30,3.75,0.88,29.60,1.12,1.12,7.25,2.64,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.18,3.30,3.75,0.88,29.60,1.12,1.12,7.25,2.64,2.74,12.17,3.13,0.90 +2.86,12.15,1.80,8.18,3.30,3.75,0.88,29.60,1.12,1.12,7.25,2.64,2.74,12.17,3.13,0.90 +2.86,12.15,1.80,8.18,3.30,3.75,0.88,29.60,1.12,1.12,7.25,2.64,2.74,12.17,3.13,0.90 +2.86,12.15,1.80,8.18,3.30,3.75,0.88,29.60,1.12,1.12,7.25,2.64,2.74,12.17,3.13,0.90 +2.86,12.15,1.80,8.18,3.30,3.75,0.88,29.60,1.12,1.12,7.25,2.64,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.18,3.30,3.75,0.88,29.60,1.12,1.12,7.25,2.64,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.18,3.30,3.75,0.88,29.60,1.12,1.12,7.25,2.64,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.18,3.30,3.75,0.88,29.60,1.12,1.12,7.25,2.64,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.18,3.30,3.75,0.88,29.60,1.12,1.12,7.25,2.64,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.18,3.30,3.75,0.88,29.60,1.12,1.12,7.25,2.64,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.18,3.30,3.75,0.88,29.60,1.12,1.12,7.25,2.64,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.18,3.30,3.75,0.88,29.60,1.12,1.12,7.25,2.64,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.18,3.30,3.75,0.88,29.60,1.12,1.12,7.25,2.64,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.18,3.30,3.75,0.88,29.60,1.12,1.12,7.25,2.64,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.18,3.30,3.75,0.88,29.60,1.12,1.12,7.25,2.64,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.18,3.30,3.75,0.88,29.60,1.12,1.12,7.25,2.64,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.18,3.30,3.75,0.88,39.20,1.12,1.12,7.25,2.64,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.18,3.30,3.75,0.88,39.20,1.12,1.12,7.25,2.64,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.18,3.30,3.75,0.88,39.20,1.12,1.12,7.25,2.64,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.25,2.64,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.25,2.64,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.25,2.64,2.76,12.17,3.34,0.90 +2.86,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.25,2.64,2.76,12.17,3.34,0.90 +2.86,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.17,3.34,0.90 +2.86,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.17,3.34,0.90 +2.86,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.17,3.34,0.90 +2.86,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.17,3.34,0.90 +2.86,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.17,3.34,0.90 +2.86,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.17,3.34,0.90 +2.86,12.15,1.80,8.21,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.17,3.34,0.90 +2.86,12.14,1.80,8.21,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.17,3.34,0.90 +2.88,12.14,1.80,8.21,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.17,3.34,0.90 +2.88,12.14,1.80,8.21,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.17,3.34,0.90 +2.88,12.14,1.80,8.21,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.18,3.34,0.90 +2.88,12.14,1.80,8.21,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.18,3.34,0.90 +2.88,12.14,1.80,8.21,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.18,3.34,0.90 +2.88,12.14,1.80,8.21,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.18,3.34,0.90 +2.88,12.14,1.80,8.21,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.18,3.34,0.90 +2.88,12.14,1.80,8.21,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.18,3.34,0.90 +2.88,12.14,1.80,8.21,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.18,3.34,0.90 +2.88,12.14,1.80,8.21,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.18,3.34,0.90 +2.88,12.14,1.80,8.21,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.18,3.34,0.90 +2.88,12.14,1.80,8.21,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.18,3.34,0.90 +2.88,12.14,1.80,8.21,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.18,3.34,0.90 +2.88,12.14,1.80,8.21,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.18,3.34,0.90 +2.88,12.14,1.80,8.21,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.18,3.34,0.90 +2.88,12.14,1.80,8.21,3.30,4.04,0.88,29.60,1.12,1.12,7.17,2.64,2.76,12.18,3.34,0.90 +2.88,12.14,1.80,8.21,3.30,4.04,0.88,29.60,1.12,1.12,7.17,2.64,2.76,12.18,3.34,0.90 +2.88,12.14,1.80,8.21,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.64,2.76,12.18,3.34,0.90 +2.88,12.14,1.80,8.21,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.64,2.76,12.18,3.34,0.90 +2.88,12.14,1.80,8.21,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.64,2.76,12.18,3.34,0.90 +2.88,12.14,1.80,8.21,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.64,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.64,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.64,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.64,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.64,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.15,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.15,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.18,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.18,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.18,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.60,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.60,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.60,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.60,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.61,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.15,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.61,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.28,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.61,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.28,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.61,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.61,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.61,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.61,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.61,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.61,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.61,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.61,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.61,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.61,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.61,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.61,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.61,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.61,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.61,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.61,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.61,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.61,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.61,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.71,0.88,29.60,1.12,1.12,7.14,2.61,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.71,0.88,29.60,1.12,1.12,7.14,2.61,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.71,0.88,29.60,1.12,1.12,7.14,2.61,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.71,0.88,29.60,1.12,1.12,7.14,2.61,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.71,0.88,29.60,1.12,1.12,7.14,2.61,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.71,0.88,29.60,1.12,1.12,7.24,2.61,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.71,0.88,29.60,1.12,1.12,7.24,2.61,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.71,0.88,29.60,1.12,1.12,7.24,2.61,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.71,0.88,29.60,1.12,1.12,7.24,2.63,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.71,0.88,29.60,1.12,1.12,7.24,2.63,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.28,3.30,3.71,0.88,29.60,1.12,1.12,7.24,2.63,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.25,3.30,3.71,0.88,29.60,1.12,1.12,7.24,2.63,2.76,12.18,3.09,0.90 +2.88,12.14,1.80,8.25,3.30,3.71,0.88,29.60,1.12,1.12,7.24,2.63,2.76,12.18,3.09,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.60,1.12,1.12,7.24,2.63,2.76,12.18,3.09,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.60,1.12,1.12,7.24,2.63,2.76,12.18,3.09,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.60,1.12,1.12,7.24,2.63,2.76,12.17,3.09,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.60,1.12,1.12,7.24,2.63,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.60,1.12,1.12,7.24,2.63,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.60,1.12,1.12,7.24,2.63,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.60,1.12,1.12,7.24,2.63,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.60,1.12,1.12,7.24,2.63,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.60,1.12,1.12,7.24,2.63,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.60,1.12,1.12,7.24,2.63,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.60,1.12,1.12,7.24,2.63,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.60,1.12,1.12,7.24,2.63,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.60,1.12,1.12,7.24,2.63,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.60,1.12,1.12,7.24,2.63,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.70,1.12,1.12,7.24,2.63,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.70,1.12,1.12,7.24,2.63,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.70,1.12,1.12,7.24,2.63,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.70,1.12,1.12,7.24,2.63,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.70,1.12,1.12,7.24,2.63,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.70,1.12,1.12,7.24,2.63,2.74,12.17,3.08,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.70,1.12,1.12,7.24,2.63,2.74,12.17,3.08,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.70,1.12,1.12,7.24,2.63,2.74,12.17,3.08,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.63,2.74,12.17,3.08,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.63,2.74,12.17,3.08,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.63,2.74,12.17,3.08,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.61,2.74,12.17,3.08,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.61,2.74,12.17,3.08,0.90 +2.86,12.15,1.80,8.25,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.61,2.74,12.17,3.08,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.61,2.74,12.17,3.08,0.90 +2.86,12.14,1.80,8.20,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.61,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.61,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.61,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.61,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.61,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.61,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.61,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.61,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.61,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.61,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.61,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.61,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.61,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.61,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.61,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.50,1.12,1.12,7.18,2.61,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.50,1.12,1.12,7.18,2.61,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.50,1.12,1.12,7.18,2.61,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.61,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.61,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.61,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.61,2.74,12.17,3.15,0.90 +2.88,12.14,1.80,8.20,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.61,2.74,12.17,3.15,0.90 +2.88,12.14,1.80,8.20,3.30,3.66,0.88,29.50,1.12,1.12,7.14,2.61,2.74,12.17,3.15,0.90 +2.88,12.14,1.80,8.20,3.30,3.66,0.88,29.50,1.12,1.12,7.14,2.61,2.74,12.17,3.15,0.90 +2.88,12.14,1.80,8.20,3.30,3.66,0.88,29.50,1.12,1.12,7.14,2.61,2.74,12.17,3.15,0.90 +2.88,12.14,1.80,8.20,3.30,3.66,0.88,29.50,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.88,12.14,1.80,8.20,3.30,3.66,0.88,29.50,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.88,12.14,1.80,8.20,3.30,3.66,0.88,29.50,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.88,12.14,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.74,0.88,29.60,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.74,0.88,29.60,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.74,0.88,29.60,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.74,0.88,29.60,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.74,0.88,29.60,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.74,0.88,29.60,1.12,1.12,7.14,2.65,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.65,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.65,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.14,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.15,0.90 +2.86,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.15,0.90 +2.88,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.15,0.90 +2.88,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.15,0.90 +2.88,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.15,0.90 +2.88,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.15,0.90 +2.88,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.15,0.90 +2.88,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.15,0.90 +2.88,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.15,0.90 +2.88,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.15,0.90 +2.88,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.15,0.90 +2.88,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.15,0.90 +2.88,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.15,0.90 +2.88,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.15,0.90 +2.88,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.15,0.90 +2.88,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.15,0.90 +2.88,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.15,0.90 +2.88,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.15,0.90 +2.88,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.15,0.90 +2.88,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.15,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.15,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.15,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.06,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.06,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.06,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.74,12.17,3.06,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.74,12.17,3.06,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.74,12.17,3.06,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.74,12.17,3.06,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.74,12.17,3.06,0.90 +2.88,12.15,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.74,12.17,3.06,0.90 +2.88,12.15,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.74,12.17,3.06,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.74,12.17,3.06,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.74,12.17,3.06,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.76,12.18,3.06,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.76,12.18,3.06,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.76,12.18,3.06,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.76,12.18,3.06,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.76,12.18,3.06,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.76,12.18,3.06,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.76,12.18,3.06,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.76,12.18,3.06,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.76,12.18,3.06,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.76,12.18,3.06,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.76,12.18,3.06,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.76,12.18,3.06,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.76,12.18,3.06,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.76,12.18,3.06,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.76,12.18,3.06,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.76,12.18,3.06,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.76,12.18,3.06,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.76,12.18,3.06,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.76,12.18,3.13,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.76,12.18,3.13,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.14,2.62,2.76,12.18,3.13,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.15,2.62,2.76,12.18,3.13,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.15,2.62,2.76,12.18,3.13,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.15,2.62,2.76,12.18,3.13,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.15,2.61,2.76,12.18,3.13,0.90 +3.04,12.14,1.80,8.32,3.30,3.66,0.88,29.60,1.12,1.12,7.15,2.61,2.76,12.18,3.13,0.90 +3.04,12.14,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.15,2.61,2.76,12.18,3.13,0.90 +3.04,12.14,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.15,2.61,2.76,12.18,3.13,0.90 +2.86,12.15,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.15,2.61,2.76,12.18,3.13,0.90 +2.86,12.15,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.15,2.61,2.76,12.18,3.13,0.90 +2.86,12.15,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.15,2.61,2.76,12.17,3.13,0.90 +2.86,12.15,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.15,2.61,2.78,12.17,3.13,0.90 +2.86,12.15,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.15,2.61,2.78,12.17,3.13,0.90 +2.86,12.15,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.15,2.61,2.78,12.17,3.13,0.90 +2.86,12.15,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.15,2.61,2.78,12.17,3.13,0.90 +2.86,12.15,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.15,2.61,2.78,12.17,3.13,0.90 +2.86,12.15,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.15,2.61,2.78,12.17,3.13,0.90 +2.86,12.15,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.15,2.61,2.78,12.17,3.13,0.90 +2.86,12.15,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.15,2.61,2.78,12.17,3.13,0.90 +2.86,12.15,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.15,2.61,2.78,12.17,3.13,0.90 +2.86,12.15,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.15,2.61,2.78,12.17,3.13,0.90 +2.86,12.15,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.15,2.61,2.78,12.17,3.13,0.90 +2.86,12.15,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.15,2.61,2.78,12.17,3.13,0.90 +2.86,12.15,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.15,2.61,2.78,12.17,3.13,0.90 +2.86,12.15,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.15,2.61,2.78,12.17,3.13,0.90 +2.86,12.15,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.15,2.61,2.78,12.17,3.13,0.90 +2.86,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.15,2.61,2.78,12.17,3.13,0.90 +2.86,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.15,2.61,2.78,12.17,3.13,0.90 +2.86,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.15,2.61,2.78,12.17,3.06,0.90 +2.86,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.15,2.61,2.78,12.17,3.06,0.90 +2.86,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.15,2.61,2.78,12.17,3.06,0.90 +2.86,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.23,2.61,2.78,12.17,3.06,0.90 +2.86,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.23,2.61,2.78,12.17,3.06,0.90 +2.86,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.23,2.61,2.78,12.17,3.06,0.90 +2.86,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.23,2.62,2.78,12.17,3.06,0.90 +2.86,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.23,2.62,2.78,12.17,3.06,0.90 +2.86,12.15,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.23,2.62,2.78,12.17,3.06,0.90 +2.86,12.15,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.23,2.62,2.78,12.17,3.06,0.90 +2.86,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.23,2.62,2.78,12.17,3.06,0.90 +2.86,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.23,2.62,2.78,12.17,3.06,0.90 +2.86,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.23,2.62,2.78,12.18,3.06,0.90 +2.86,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.23,2.62,2.74,12.18,3.06,0.90 +2.86,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.23,2.62,2.74,12.18,3.06,0.90 +2.86,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.23,2.62,2.74,12.18,3.06,0.90 +2.86,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.23,2.62,2.74,12.18,3.06,0.90 +2.86,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.23,2.62,2.74,12.18,3.06,0.90 +2.86,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.23,2.62,2.74,12.18,3.06,0.90 +2.86,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.23,2.62,2.74,12.18,3.06,0.90 +2.86,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.23,2.62,2.74,12.18,3.06,0.90 +2.86,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.23,2.62,2.74,12.18,3.06,0.90 +2.86,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.23,2.62,2.74,12.18,3.06,0.90 +2.86,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.23,2.62,2.74,12.18,3.06,0.90 +2.86,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.23,2.62,2.74,12.18,3.06,0.90 +2.86,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.23,2.62,2.74,12.18,3.06,0.90 +2.86,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.23,2.62,2.74,12.18,3.06,0.90 +2.86,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.23,2.62,2.74,12.18,3.06,0.90 +2.86,12.14,1.80,8.18,3.30,3.77,0.88,29.60,1.12,1.12,7.23,2.62,2.74,12.18,3.06,0.90 +2.86,12.14,1.80,8.18,3.30,3.77,0.88,29.60,1.12,1.12,7.23,2.62,2.74,12.18,3.06,0.90 +2.86,12.14,1.80,8.18,3.30,3.77,0.88,29.60,1.12,1.12,7.23,2.62,2.74,12.18,3.12,0.90 +2.86,12.14,1.80,8.18,3.30,3.77,0.88,29.60,1.12,1.12,7.23,2.62,2.74,12.18,3.12,0.90 +2.86,12.14,1.80,8.18,3.30,3.77,0.88,29.60,1.12,1.12,7.23,2.62,2.74,12.18,3.12,0.90 +2.86,12.14,1.80,8.18,3.30,3.77,0.88,29.60,1.12,1.12,7.20,2.62,2.74,12.18,3.12,0.90 +2.86,12.14,1.80,8.18,3.30,3.77,0.88,29.60,1.12,1.12,7.20,2.62,2.74,12.18,3.12,0.90 +2.86,12.14,1.80,8.18,3.30,3.77,0.88,29.60,1.12,1.12,7.20,2.62,2.74,12.18,3.12,0.90 +2.86,12.14,1.80,8.18,3.30,3.77,0.88,29.60,1.12,1.12,7.20,2.60,2.74,12.18,3.12,0.90 +2.86,12.14,1.80,8.18,3.30,3.77,0.88,29.60,1.12,1.12,7.20,2.60,2.74,12.18,3.12,0.90 +2.86,12.14,1.80,8.18,3.30,3.77,0.88,29.60,1.12,1.12,7.20,2.60,2.74,12.18,3.12,0.90 +2.86,12.14,1.80,8.18,3.30,3.77,0.88,29.60,1.12,1.12,7.20,2.60,2.74,12.18,3.12,0.90 +2.86,12.15,1.80,8.18,3.30,3.77,0.88,29.60,1.12,1.12,7.20,2.60,2.74,12.18,3.12,0.90 +2.88,12.15,1.80,8.18,3.30,3.77,0.88,29.60,1.12,1.12,7.20,2.60,2.74,12.18,3.12,0.90 +2.88,12.15,1.80,8.18,3.30,3.77,0.88,29.60,1.12,1.12,7.20,2.60,2.74,12.18,3.12,0.90 +2.88,12.15,1.80,8.18,3.30,3.77,0.88,29.60,1.12,1.12,7.20,2.60,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.18,3.30,3.77,0.88,29.60,1.12,1.12,7.20,2.60,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.18,3.30,3.77,0.88,29.60,1.12,1.12,7.20,2.60,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.18,3.30,3.77,0.88,29.60,1.12,1.12,7.20,2.60,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.18,3.30,3.77,0.88,29.60,1.12,1.12,7.20,2.60,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.18,3.30,3.77,0.88,29.60,1.12,1.12,7.20,2.60,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.18,3.30,3.77,0.88,29.60,1.12,1.12,7.20,2.60,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.18,3.30,3.77,0.88,29.60,1.12,1.12,7.20,2.60,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.18,3.30,3.77,0.88,29.60,1.12,1.12,7.20,2.60,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.18,3.30,3.77,0.88,29.60,1.12,1.12,7.20,2.60,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.18,3.30,3.77,0.88,29.60,1.12,1.12,7.20,2.60,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.18,3.30,3.77,0.88,29.60,1.12,1.12,7.20,2.60,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.18,3.30,3.77,0.88,39.20,1.12,1.12,7.20,2.60,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.18,3.30,3.77,0.88,39.20,1.12,1.12,7.20,2.60,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.18,3.30,3.77,0.88,39.20,1.12,1.12,7.20,2.60,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.20,2.60,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.20,2.60,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.20,2.60,2.76,12.18,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.20,2.60,2.76,12.18,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.20,2.60,2.76,12.18,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.60,2.76,12.18,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.60,2.76,12.18,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.18,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.18,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.18,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.18,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.18,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.18,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.18,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.76,12.17,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.74,12.17,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.74,12.17,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.74,12.17,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.74,12.17,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.74,12.17,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.74,12.17,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.74,12.17,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.74,12.17,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.74,12.17,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.74,12.17,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.74,12.17,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,39.20,1.12,1.12,7.17,2.64,2.74,12.17,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,29.60,1.12,1.12,7.17,2.64,2.74,12.17,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,29.60,1.12,1.12,7.17,2.64,2.74,12.17,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,4.04,0.88,29.60,1.12,1.12,7.17,2.64,2.74,12.17,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.64,2.74,12.17,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.64,2.74,12.17,3.38,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.64,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.64,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.64,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.25,2.64,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.25,2.64,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.25,2.64,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.25,2.59,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.25,2.59,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.09,3.30,3.66,0.88,29.60,1.12,1.12,7.25,2.59,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.09,3.30,3.66,0.88,29.60,1.12,1.12,7.25,2.59,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.09,3.30,3.66,0.88,29.60,1.12,1.12,7.25,2.59,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.09,3.30,3.66,0.88,29.60,1.12,1.12,7.25,2.59,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.09,3.30,3.66,0.88,29.60,1.12,1.12,7.25,2.59,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.09,3.30,3.66,0.88,29.60,1.12,1.12,7.25,2.59,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.09,3.30,3.66,0.88,29.60,1.12,1.12,7.25,2.59,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.09,3.30,3.66,0.88,29.60,1.12,1.12,7.25,2.59,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.09,3.30,3.66,0.88,29.60,1.12,1.12,7.25,2.59,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.09,3.30,3.66,0.88,29.60,1.12,1.12,7.25,2.59,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.09,3.30,3.66,0.88,29.60,1.12,1.12,7.25,2.59,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.09,3.30,3.66,0.88,29.60,1.12,1.12,7.25,2.59,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.09,3.30,3.66,0.88,29.60,1.12,1.12,7.25,2.59,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.09,3.30,3.66,0.88,29.60,1.12,1.12,7.25,2.59,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.09,3.30,3.66,0.88,29.60,1.12,1.12,7.25,2.59,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.09,3.30,3.66,0.88,29.60,1.12,1.12,7.25,2.59,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.09,3.30,3.66,0.88,29.60,1.12,1.12,7.25,2.59,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.09,3.30,3.66,0.88,29.70,1.12,1.12,7.25,2.59,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.09,3.30,3.66,0.88,29.70,1.12,1.12,7.25,2.59,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.09,3.30,3.66,0.88,29.70,1.12,1.12,7.25,2.59,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.09,3.30,3.71,0.88,29.70,1.12,1.12,7.25,2.59,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.09,3.30,3.71,0.88,29.70,1.12,1.12,7.25,2.59,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.09,3.30,3.71,0.88,29.70,1.12,1.12,7.25,2.59,2.74,12.18,3.18,0.90 +2.88,12.15,1.80,8.09,3.30,3.71,0.88,29.70,1.12,1.12,7.25,2.59,2.74,12.18,3.18,0.90 +2.88,12.15,1.80,8.09,3.30,3.71,0.88,29.70,1.12,1.12,7.25,2.59,2.74,12.18,3.18,0.90 +2.88,12.15,1.80,8.09,3.30,3.71,0.88,29.70,1.12,1.12,7.12,2.59,2.74,12.18,3.18,0.90 +2.88,12.15,1.80,8.09,3.30,3.71,0.88,29.70,1.12,1.12,7.12,2.59,2.74,12.18,3.18,0.90 +2.88,12.15,1.80,8.09,3.30,3.71,0.88,29.70,1.12,1.12,7.12,2.59,2.74,12.18,3.18,0.90 +2.88,12.15,1.80,8.09,3.30,3.71,0.88,29.70,1.12,1.12,7.12,2.61,2.74,12.18,3.18,0.90 +2.88,12.15,1.80,8.09,3.30,3.71,0.88,29.70,1.12,1.12,7.12,2.61,2.74,12.18,3.18,0.90 +2.88,12.15,1.80,8.09,3.30,3.71,0.88,29.70,1.12,1.12,7.12,2.61,2.74,12.18,3.18,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.12,2.61,2.74,12.18,3.18,0.90 +2.88,12.14,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.12,2.61,2.74,12.18,3.18,0.90 +2.86,12.14,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.12,2.61,2.74,12.18,3.18,0.90 +2.86,12.14,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.12,2.61,2.74,12.18,3.18,0.90 +2.86,12.14,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.12,2.61,2.76,12.17,3.18,0.90 +2.86,12.14,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.12,2.61,2.76,12.17,3.18,0.90 +2.86,12.14,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.12,2.61,2.76,12.17,3.18,0.90 +2.86,12.14,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.12,2.61,2.76,12.17,3.18,0.90 +2.86,12.14,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.12,2.61,2.76,12.17,3.18,0.90 +2.86,12.14,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.12,2.61,2.76,12.17,3.18,0.90 +2.86,12.14,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.12,2.61,2.76,12.17,3.18,0.90 +2.86,12.14,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.12,2.61,2.76,12.17,3.18,0.90 +2.86,12.14,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.12,2.61,2.76,12.17,3.18,0.90 +2.86,12.14,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.12,2.61,2.76,12.17,3.18,0.90 +2.86,12.14,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.12,2.61,2.76,12.17,3.18,0.90 +2.86,12.14,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.12,2.61,2.76,12.17,3.18,0.90 +2.86,12.14,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.12,2.61,2.76,12.17,3.18,0.90 +2.86,12.14,1.80,8.12,3.30,3.71,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.17,3.18,0.90 +2.86,12.14,1.80,8.12,3.30,3.71,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.17,3.18,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.17,3.18,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.17,3.18,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.17,3.18,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.17,3.07,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.17,3.07,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.17,3.07,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.16,2.61,2.76,12.17,3.07,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.16,2.61,2.76,12.17,3.07,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.16,2.62,2.76,12.17,3.07,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.16,2.62,2.76,12.17,3.07,0.90 +2.86,12.14,1.80,8.12,3.30,3.66,0.88,29.60,1.12,1.12,7.16,2.62,2.76,12.17,3.07,0.90 +2.86,12.14,1.80,8.10,3.30,3.66,0.88,29.60,1.12,1.12,7.16,2.62,2.76,12.17,3.07,0.90 +2.86,12.14,1.80,8.10,3.30,3.66,0.88,29.60,1.12,1.12,7.16,2.62,2.76,12.17,3.07,0.90 +2.86,12.15,1.80,8.10,3.30,3.66,0.88,29.60,1.12,1.12,7.16,2.62,2.76,12.17,3.07,0.90 +3.14,12.15,1.80,8.10,3.30,3.66,0.88,29.60,1.12,1.12,7.16,2.62,2.76,12.17,3.07,0.90 +3.14,12.15,1.80,8.10,3.30,3.66,0.88,29.60,1.12,1.12,7.16,2.62,2.76,12.17,3.07,0.90 +3.14,12.15,1.80,8.10,3.30,3.66,0.88,29.60,1.12,1.12,7.16,2.62,3.51,12.17,3.07,0.90 +3.14,12.15,1.80,8.10,3.30,3.66,0.88,29.60,1.12,1.12,7.16,2.62,3.51,12.17,3.07,0.90 +3.14,12.15,1.80,8.10,3.30,3.66,0.88,29.60,1.12,1.12,7.16,2.62,3.51,12.17,3.07,0.90 +3.14,12.15,1.80,8.10,3.30,3.66,0.88,29.60,1.12,1.12,7.16,2.62,3.51,12.17,3.07,0.90 +3.14,12.15,1.80,8.10,3.30,3.66,0.88,29.60,1.12,1.12,7.16,2.62,3.51,12.17,3.07,0.90 +3.14,12.15,1.80,8.10,3.30,3.66,0.88,29.60,1.12,1.12,7.16,2.62,3.51,12.17,3.07,0.90 +3.14,12.15,1.80,8.10,3.30,3.66,0.88,29.60,1.12,1.12,7.16,2.62,3.51,12.17,3.07,0.90 +3.14,12.15,1.80,8.10,3.30,3.66,0.88,29.60,1.12,1.12,7.16,2.62,3.51,12.17,3.07,0.90 +3.14,12.15,1.80,8.10,3.30,3.66,0.88,29.60,1.12,1.12,7.16,2.62,3.51,12.17,3.07,0.90 +3.14,12.15,1.80,8.10,3.30,3.66,0.88,29.60,1.12,1.12,7.16,2.62,3.51,12.17,3.07,0.90 +3.14,12.15,1.80,8.10,3.30,3.66,0.88,29.60,1.12,1.12,7.16,2.62,3.51,12.17,3.07,0.90 +3.14,12.15,1.80,8.10,3.30,3.66,0.88,29.60,1.12,1.12,7.16,2.62,3.51,12.17,3.07,0.90 +3.14,12.15,1.80,8.10,3.30,3.66,0.88,29.60,1.12,1.12,7.16,2.62,3.51,12.17,3.07,0.90 +3.14,12.15,1.80,8.10,3.30,3.66,0.88,29.60,1.12,1.12,7.16,2.62,3.51,12.17,3.07,0.90 +3.14,12.15,1.80,8.10,3.30,3.74,0.88,29.60,1.12,1.12,7.16,2.62,3.51,12.17,3.07,0.90 +3.14,12.15,1.80,8.10,3.30,3.74,0.88,29.60,1.12,1.12,7.16,2.62,3.51,12.17,3.07,0.90 +3.14,12.15,1.80,8.10,3.30,3.74,0.88,29.60,1.12,1.12,7.16,2.62,3.51,12.17,3.07,0.90 +3.14,12.15,1.80,8.10,3.30,3.74,0.88,29.60,1.12,1.12,7.16,2.62,3.51,12.17,3.12,0.90 +3.14,12.15,1.80,8.10,3.30,3.74,0.88,29.60,1.12,1.12,7.16,2.62,3.51,12.17,3.12,0.90 +3.14,12.15,1.80,8.10,3.30,3.74,0.88,29.60,1.12,1.12,7.30,2.62,3.51,12.17,3.12,0.90 +3.14,12.15,1.80,8.10,3.30,3.74,0.88,29.60,1.12,1.12,7.30,2.62,3.51,12.17,3.12,0.90 +3.14,12.15,1.80,8.10,3.30,3.74,0.88,29.60,1.12,1.12,7.30,2.62,3.51,12.17,3.12,0.90 +3.14,12.15,1.80,8.10,3.30,3.74,0.88,29.60,1.12,1.12,7.30,2.66,3.51,12.17,3.12,0.90 +3.14,12.15,1.80,8.10,3.30,3.74,0.88,29.60,1.12,1.12,7.30,2.66,3.51,12.17,3.12,0.90 +3.14,12.15,1.80,8.10,3.30,3.74,0.88,29.60,1.12,1.12,7.30,2.66,3.51,12.17,3.12,0.90 +3.14,12.15,1.80,8.12,3.30,3.74,0.88,29.60,1.12,1.12,7.30,2.66,3.51,12.17,3.12,0.90 +3.14,12.15,1.80,8.12,3.30,3.74,0.88,29.60,1.12,1.12,7.30,2.66,3.51,12.17,3.12,0.90 +2.86,12.15,1.80,8.12,3.30,3.74,0.88,29.60,1.12,1.12,7.30,2.66,3.51,12.17,3.12,0.90 +2.86,12.15,1.80,8.12,3.30,3.74,0.88,29.60,1.12,1.12,7.30,2.66,3.51,12.17,3.12,0.90 +2.86,12.15,1.80,8.12,3.30,3.74,0.88,29.60,1.12,1.12,7.30,2.66,2.76,12.17,3.12,0.90 +2.86,12.15,1.80,8.12,3.30,3.74,0.88,29.60,1.12,1.12,7.30,2.66,2.76,12.17,3.12,0.90 +2.86,12.15,1.80,8.12,3.30,3.74,0.88,29.60,1.12,1.12,7.30,2.66,2.76,12.17,3.12,0.90 +2.86,12.15,1.80,8.12,3.30,3.74,0.88,29.60,1.12,1.12,7.30,2.66,2.76,12.17,3.12,0.90 +2.86,12.15,1.80,8.12,3.30,3.74,0.88,29.60,1.12,1.12,7.30,2.66,2.76,12.17,3.12,0.90 +2.86,12.15,1.80,8.12,3.30,3.74,0.88,29.60,1.12,1.12,7.30,2.66,2.76,12.17,3.12,0.90 +2.86,12.15,1.80,8.12,3.30,3.74,0.88,29.60,1.12,1.12,7.30,2.66,2.76,12.17,3.12,0.90 +2.86,12.15,1.80,8.12,3.30,3.74,0.88,29.60,1.12,1.12,7.30,2.66,2.76,12.17,3.12,0.90 +2.86,12.15,1.80,8.12,3.30,3.74,0.88,29.60,1.12,1.12,7.30,2.66,2.76,12.17,3.12,0.90 +2.86,12.15,1.80,8.12,3.30,3.74,0.88,29.60,1.12,1.12,7.30,2.66,2.76,12.17,3.12,0.90 +2.86,12.15,1.80,8.12,3.30,3.74,0.88,29.60,1.12,1.12,7.30,2.66,2.76,12.17,3.12,0.90 +2.86,12.15,1.80,8.12,3.30,3.74,0.88,29.60,1.12,1.12,7.30,2.66,2.76,12.17,3.12,0.90 +2.86,12.15,1.80,8.12,3.30,3.74,0.88,29.60,1.12,1.12,7.30,2.66,2.76,12.17,3.12,0.90 +2.86,12.15,1.80,8.12,3.30,3.74,0.88,29.60,1.12,1.12,7.30,2.66,2.76,12.17,3.12,0.90 +2.86,12.15,1.80,8.12,3.30,3.74,0.88,29.60,1.12,1.12,7.30,2.66,2.76,12.17,3.12,0.90 +2.86,12.15,1.80,8.12,3.30,3.69,0.88,29.60,1.12,1.12,7.30,2.66,2.76,12.17,3.12,0.90 +2.86,12.15,1.80,8.12,3.30,3.69,0.88,29.60,1.12,1.12,7.30,2.66,2.76,12.17,3.12,0.90 +2.86,12.15,1.80,8.12,3.30,3.69,0.88,29.60,1.12,1.12,7.30,2.66,2.76,12.17,3.12,0.90 +2.86,12.15,1.80,8.12,3.30,3.69,0.88,29.60,1.12,1.12,7.30,2.66,2.76,12.17,3.17,0.90 +2.86,12.15,1.80,8.12,3.30,3.69,0.88,29.60,1.12,1.12,7.30,2.66,2.76,12.17,3.17,0.90 +2.86,12.15,1.80,8.12,3.30,3.69,0.88,29.60,1.12,1.12,7.30,2.66,2.76,12.17,3.17,0.90 +2.86,12.15,1.80,8.12,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.66,2.76,12.17,3.17,0.90 +2.86,12.15,1.80,8.12,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.66,2.76,12.17,3.17,0.90 +2.86,12.15,1.80,8.12,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.17,3.17,0.90 +2.86,12.15,1.80,8.12,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.17,3.17,0.90 +2.86,12.15,1.80,8.12,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.17,3.17,0.90 +2.86,12.15,1.80,8.17,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.17,3.17,0.90 +2.86,12.15,1.80,8.17,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.17,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.17,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.17,3.17,0.90 +2.88,12.14,1.80,8.17,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.18,3.17,0.90 +2.88,12.14,1.80,8.17,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.18,3.17,0.90 +2.88,12.14,1.80,8.17,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.18,3.17,0.90 +2.88,12.14,1.80,8.17,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.18,3.17,0.90 +2.88,12.14,1.80,8.17,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.18,3.17,0.90 +2.88,12.14,1.80,8.17,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.18,3.17,0.90 +2.88,12.14,1.80,8.17,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.18,3.17,0.90 +2.88,12.14,1.80,8.17,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.18,3.17,0.90 +2.88,12.14,1.80,8.17,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.18,3.17,0.90 +2.88,12.14,1.80,8.17,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.18,3.17,0.90 +2.88,12.14,1.80,8.17,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.18,3.17,0.90 +2.88,12.14,1.80,8.17,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.18,3.17,0.90 +2.88,12.14,1.80,8.17,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.18,3.17,0.90 +2.88,12.14,1.80,8.17,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.18,3.17,0.90 +2.88,12.14,1.80,8.17,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.18,3.17,0.90 +2.88,12.14,1.80,8.17,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.18,3.17,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.18,3.17,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.18,3.17,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.60,1.12,1.12,7.19,2.61,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.60,1.12,1.12,7.15,2.61,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.60,1.12,1.12,7.15,2.61,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.60,1.12,1.12,7.15,2.63,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.60,1.12,1.12,7.15,2.63,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.60,1.12,1.12,7.15,2.63,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.15,2.63,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.15,2.63,2.76,12.18,3.14,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.15,2.63,2.76,12.18,3.14,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.15,2.63,2.76,12.18,3.14,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.15,2.63,2.76,12.18,3.14,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.15,2.63,2.74,12.18,3.14,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.15,2.63,2.74,12.18,3.14,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.15,2.63,2.74,12.18,3.14,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.15,2.63,2.74,12.18,3.14,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.15,2.63,2.74,12.18,3.14,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.15,2.63,2.74,12.18,3.14,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.15,2.63,2.74,12.18,3.14,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.15,2.63,2.74,12.18,3.14,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.15,2.63,2.74,12.18,3.14,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.15,2.63,2.74,12.18,3.14,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.15,2.63,2.74,12.18,3.14,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.15,2.63,2.74,12.18,3.14,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.15,2.63,2.74,12.18,3.14,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.15,2.63,2.74,12.18,3.14,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.15,2.63,2.74,12.18,3.14,0.90 +2.86,12.15,1.80,8.20,3.30,3.74,0.88,29.70,1.12,1.12,7.15,2.63,2.74,12.18,3.14,0.90 +2.86,12.15,1.80,8.20,3.30,3.74,0.88,29.70,1.12,1.12,7.15,2.63,2.74,12.18,3.14,0.90 +2.86,12.15,1.80,8.20,3.30,3.74,0.88,29.70,1.12,1.12,7.15,2.63,2.74,12.18,3.46,0.90 +2.86,12.15,1.80,8.20,3.30,3.74,0.88,29.70,1.12,1.12,7.15,2.63,2.74,12.18,3.46,0.90 +2.86,12.15,1.80,8.20,3.30,3.74,0.88,29.70,1.12,1.12,7.15,2.63,2.74,12.18,3.46,0.90 +2.86,12.15,1.80,8.20,3.30,3.74,0.88,29.70,1.12,1.12,7.22,2.63,2.74,12.18,3.46,0.90 +2.86,12.15,1.80,8.20,3.30,3.74,0.88,29.70,1.12,1.12,7.22,2.63,2.74,12.18,3.46,0.90 +2.86,12.15,1.80,8.20,3.30,3.74,0.88,29.70,1.12,1.12,7.22,2.65,2.74,12.18,3.46,0.90 +2.86,12.15,1.80,8.20,3.30,3.74,0.88,29.70,1.12,1.12,7.22,2.65,2.74,12.18,3.46,0.90 +2.86,12.15,1.80,8.20,3.30,3.74,0.88,29.70,1.12,1.12,7.22,2.65,2.74,12.18,3.46,0.90 +2.86,12.15,1.80,8.39,3.30,3.74,0.88,29.70,1.12,1.12,7.22,2.65,2.74,12.18,3.46,0.90 +2.86,12.15,1.80,8.39,3.30,3.74,0.88,29.70,1.12,1.12,7.22,2.65,2.74,12.18,3.46,0.90 +2.88,12.15,1.80,8.39,3.30,3.74,0.88,29.70,1.12,1.12,7.22,2.65,2.74,12.18,3.46,0.90 +2.88,12.15,1.80,8.39,3.30,3.74,0.88,29.70,1.12,1.12,7.22,2.65,2.74,12.18,3.46,0.90 +2.88,12.15,1.80,8.39,3.30,3.74,0.88,29.70,1.12,1.12,7.22,2.65,2.76,12.18,3.46,0.90 +2.88,12.15,1.80,8.39,3.30,3.74,0.88,29.70,1.12,1.12,7.22,2.65,2.76,12.18,3.46,0.90 +2.88,12.15,1.80,8.39,3.30,3.74,0.88,29.70,1.12,1.12,7.22,2.65,2.76,12.18,3.46,0.90 +2.88,12.15,1.80,8.39,3.30,3.74,0.88,29.70,1.12,1.12,7.22,2.65,2.76,12.18,3.46,0.90 +2.88,12.15,1.80,8.39,3.30,3.74,0.88,29.70,1.12,1.12,7.22,2.65,2.76,12.18,3.46,0.90 +2.88,12.15,1.80,8.39,3.30,3.74,0.88,29.70,1.12,1.12,7.22,2.65,2.76,12.18,3.46,0.90 +2.88,12.15,1.80,8.39,3.30,3.74,0.88,29.70,1.12,1.12,7.22,2.65,2.76,12.18,3.46,0.90 +2.88,12.15,1.80,8.39,3.30,3.74,0.88,29.70,1.12,1.12,7.22,2.65,2.76,12.18,3.46,0.90 +2.88,12.15,1.80,8.39,3.30,3.74,0.88,29.70,1.12,1.12,7.22,2.65,2.76,12.18,3.46,0.90 +2.88,12.15,1.80,8.39,3.30,3.74,0.88,29.70,1.12,1.12,7.22,2.65,2.76,12.18,3.46,0.90 +2.88,12.15,1.80,8.39,3.30,3.74,0.88,29.70,1.12,1.12,7.22,2.65,2.76,12.18,3.46,0.90 +2.88,12.15,1.80,8.39,3.30,3.74,0.88,29.70,1.12,1.12,7.22,2.65,2.76,12.18,3.46,0.90 +2.88,12.15,1.80,8.39,3.30,3.74,0.88,29.70,1.12,1.12,7.22,2.65,2.76,12.18,3.46,0.90 +2.88,12.15,1.80,8.39,3.30,3.74,0.88,29.60,1.12,1.12,7.22,2.65,2.76,12.18,3.46,0.90 +2.88,12.15,1.80,8.39,3.30,3.74,0.88,29.60,1.12,1.12,7.22,2.65,2.76,12.18,3.46,0.90 +2.88,12.15,1.80,8.39,3.30,3.70,0.88,29.60,1.12,1.12,7.22,2.65,2.76,12.18,3.46,0.90 +2.88,12.15,1.80,8.39,3.30,3.70,0.88,29.60,1.12,1.12,7.22,2.65,2.76,12.18,3.46,0.90 +2.88,12.15,1.80,8.39,3.30,3.70,0.88,29.60,1.12,1.12,7.22,2.65,2.76,12.18,3.46,0.90 +2.88,12.15,1.80,8.39,3.30,3.70,0.88,29.60,1.12,1.12,7.22,2.65,2.76,12.18,3.20,0.90 +2.88,12.15,1.80,8.39,3.30,3.70,0.88,29.60,1.12,1.12,7.22,2.65,2.76,12.18,3.20,0.90 +2.88,12.15,1.80,8.39,3.30,3.70,0.88,29.60,1.12,1.12,7.19,2.65,2.76,12.18,3.20,0.90 +2.88,12.15,1.80,8.39,3.30,3.70,0.88,29.60,1.12,1.12,7.19,2.65,2.76,12.18,3.20,0.90 +2.88,12.15,1.80,8.39,3.30,3.70,0.88,29.60,1.12,1.12,7.19,2.65,2.76,12.18,3.20,0.90 +2.88,12.15,1.80,8.39,3.30,3.70,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.18,3.20,0.90 +2.88,12.15,1.80,8.39,3.30,3.70,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.18,3.20,0.90 +2.88,12.15,1.80,8.39,3.30,3.70,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.18,3.20,0.90 +2.88,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.18,3.20,0.90 +2.88,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.18,3.20,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.18,3.20,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.18,3.20,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.18,3.20,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.18,3.20,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.18,3.20,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.18,3.20,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.18,3.20,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.18,3.20,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.18,3.20,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.18,3.20,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.18,3.20,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.18,3.20,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.18,3.20,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.18,3.20,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.64,2.76,12.18,3.20,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.64,2.76,12.18,3.20,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.19,2.64,2.76,12.18,3.20,0.90 +2.86,12.15,1.80,8.20,3.30,3.73,0.88,29.70,1.12,1.12,7.19,2.64,2.76,12.18,3.20,0.90 +2.86,12.15,1.80,8.20,3.30,3.73,0.88,29.70,1.12,1.12,7.19,2.64,2.76,12.18,3.20,0.90 +2.86,12.15,1.80,8.20,3.30,3.73,0.88,29.70,1.12,1.12,7.19,2.64,2.76,12.18,3.20,0.90 +2.86,12.15,1.80,8.20,3.30,3.73,0.88,29.70,1.12,1.12,7.19,2.64,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.73,0.88,29.70,1.12,1.12,7.19,2.64,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.73,0.88,29.70,1.12,1.12,7.20,2.64,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.73,0.88,29.70,1.12,1.12,7.20,2.64,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.73,0.88,29.70,1.12,1.12,7.20,2.64,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.73,0.88,29.70,1.12,1.12,7.20,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.73,0.88,29.70,1.12,1.12,7.20,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.70,1.12,1.12,7.20,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.70,1.12,1.12,7.20,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.70,1.12,1.12,7.20,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.70,1.12,1.12,7.20,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.70,1.12,1.12,7.20,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.70,1.12,1.12,7.20,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.70,1.12,1.12,7.20,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.70,1.12,1.12,7.20,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.70,1.12,1.12,7.20,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.70,1.12,1.12,7.20,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.70,1.12,1.12,7.20,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.70,1.12,1.12,7.20,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.70,1.12,1.12,7.20,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.70,1.12,1.12,7.20,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.70,1.12,1.12,7.20,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.70,1.12,1.12,7.20,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.70,1.12,1.12,7.20,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.60,1.12,1.12,7.20,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.60,1.12,1.12,7.20,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.60,1.12,1.12,7.20,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.60,1.12,1.12,7.20,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.60,1.12,1.12,7.20,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.60,1.12,1.12,7.20,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.60,1.12,1.12,7.20,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.60,1.12,1.12,7.20,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.60,1.12,1.12,7.21,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.60,1.12,1.12,7.21,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.60,1.12,1.12,7.21,2.58,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.14,3.30,3.73,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.26,3.30,3.73,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.18,3.10,0.90 +2.86,12.15,1.80,8.26,3.30,3.73,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.73,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.73,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.73,0.88,29.60,1.12,1.12,7.21,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.73,0.88,29.60,1.12,1.12,7.21,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.73,0.88,29.60,1.12,1.12,7.21,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.73,0.88,29.60,1.12,1.12,7.21,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.73,0.88,29.60,1.12,1.12,7.21,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.73,0.88,29.60,1.12,1.12,7.21,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.73,0.88,29.60,1.12,1.12,7.21,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.73,0.88,29.60,1.12,1.12,7.21,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.73,0.88,29.60,1.12,1.12,7.21,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.73,0.88,29.60,1.12,1.12,7.21,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.73,0.88,29.60,1.12,1.12,7.21,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.73,0.88,29.60,1.12,1.12,7.21,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.73,0.88,36.80,1.12,1.12,7.21,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.73,0.88,36.80,1.12,1.12,7.21,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.73,0.88,36.80,1.12,1.12,7.21,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,4.03,0.88,36.80,1.12,1.12,7.21,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,4.03,0.88,36.80,1.12,1.12,7.21,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,4.03,0.88,36.80,1.12,1.12,7.21,2.63,2.74,12.18,3.14,0.90 +2.88,12.15,1.80,8.26,3.30,4.03,0.88,36.80,1.12,1.12,7.21,2.63,2.74,12.18,3.14,0.90 +2.88,12.15,1.80,8.26,3.30,4.03,0.88,36.80,1.12,1.12,7.21,2.63,2.74,12.18,3.14,0.90 +2.88,12.15,1.80,8.26,3.30,4.03,0.88,36.80,1.12,1.12,7.16,2.63,2.74,12.18,3.14,0.90 +2.88,12.15,1.80,8.26,3.30,4.03,0.88,36.80,1.12,1.12,7.16,2.63,2.74,12.18,3.14,0.90 +2.88,12.15,1.80,8.26,3.30,4.03,0.88,36.80,1.12,1.12,7.16,2.64,2.74,12.18,3.14,0.90 +2.88,12.15,1.80,8.26,3.30,4.03,0.88,36.80,1.12,1.12,7.16,2.64,2.74,12.18,3.14,0.90 +2.88,12.15,1.80,8.26,3.30,4.03,0.88,36.80,1.12,1.12,7.16,2.64,2.74,12.18,3.14,0.90 +2.88,12.15,1.80,8.10,3.30,4.03,0.88,36.80,1.12,1.12,7.16,2.64,2.74,12.18,3.14,0.90 +2.88,12.15,1.80,8.10,3.30,4.03,0.88,36.80,1.12,1.12,7.16,2.64,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.10,3.30,4.03,0.88,36.80,1.12,1.12,7.16,2.64,2.74,12.18,3.14,0.90 +2.86,12.14,1.80,8.10,3.30,4.03,0.88,36.80,1.12,1.12,7.16,2.64,2.74,12.18,3.14,0.90 +2.86,12.14,1.80,8.10,3.30,4.03,0.88,36.80,1.12,1.12,7.16,2.64,2.74,12.17,3.14,0.90 +2.86,12.14,1.80,8.10,3.30,4.03,0.88,36.80,1.12,1.12,7.16,2.64,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.10,3.30,4.03,0.88,36.80,1.12,1.12,7.16,2.64,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.10,3.30,4.03,0.88,36.80,1.12,1.12,7.16,2.64,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.10,3.30,4.03,0.88,36.80,1.12,1.12,7.16,2.64,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.10,3.30,4.03,0.88,36.80,1.12,1.12,7.16,2.64,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.10,3.30,4.03,0.88,36.80,1.12,1.12,7.16,2.64,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.10,3.30,4.03,0.88,36.80,1.12,1.12,7.16,2.64,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.10,3.30,4.03,0.88,36.80,1.12,1.12,7.16,2.64,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.10,3.30,4.03,0.88,36.80,1.12,1.12,7.16,2.64,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.10,3.30,4.03,0.88,36.80,1.12,1.12,7.16,2.64,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.10,3.30,4.03,0.88,36.80,1.12,1.12,7.16,2.64,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.10,3.30,4.03,0.88,36.80,1.12,1.12,7.16,2.64,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.10,3.30,4.03,0.88,29.60,1.12,1.12,7.16,2.64,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.10,3.30,4.03,0.88,29.60,1.12,1.12,7.16,2.64,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.10,3.30,4.03,0.88,29.60,1.12,1.12,7.16,2.64,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.10,3.30,3.73,0.88,29.60,1.12,1.12,7.16,2.64,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.10,3.30,3.73,0.88,29.60,1.12,1.12,7.16,2.64,2.76,12.17,3.14,0.90 +2.86,12.14,1.80,8.10,3.30,3.73,0.88,29.60,1.12,1.12,7.16,2.64,2.76,12.17,3.08,0.90 +2.86,12.14,1.80,8.10,3.30,3.73,0.88,29.60,1.12,1.12,7.16,2.64,2.76,12.17,3.08,0.90 +2.86,12.14,1.80,8.10,3.30,3.73,0.88,29.60,1.12,1.12,7.16,2.64,2.76,12.17,3.08,0.90 +2.86,12.14,1.80,8.10,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.64,2.76,12.17,3.08,0.90 +2.86,12.14,1.80,8.10,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.64,2.76,12.17,3.08,0.90 +2.86,12.14,1.80,8.10,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.66,2.76,12.17,3.08,0.90 +2.86,12.14,1.80,8.10,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.66,2.76,12.17,3.08,0.90 +2.86,12.14,1.80,8.10,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.66,2.76,12.17,3.08,0.90 +2.86,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.66,2.76,12.17,3.08,0.90 +2.86,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.66,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.66,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.66,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.66,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.66,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.66,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.66,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.66,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.66,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.66,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.66,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.66,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.66,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.66,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.66,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.66,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.17,3.30,3.73,0.88,29.50,1.12,1.12,7.17,2.66,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.50,1.12,1.12,7.17,2.66,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.50,1.12,1.12,7.17,2.66,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.50,1.12,1.12,7.17,2.66,2.76,12.17,3.18,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.50,1.12,1.12,7.17,2.66,2.76,12.17,3.18,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.50,1.12,1.12,7.17,2.66,2.76,12.17,3.18,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.50,1.12,1.12,7.21,2.66,2.76,12.17,3.18,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.50,1.12,1.12,7.21,2.66,2.76,12.17,3.18,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.50,1.12,1.12,7.21,2.66,2.76,12.17,3.18,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.50,1.12,1.12,7.21,2.60,2.76,12.17,3.18,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.50,1.12,1.12,7.21,2.60,2.76,12.17,3.18,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.21,2.60,2.76,12.17,3.18,0.90 +2.88,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.21,2.60,2.76,12.17,3.18,0.90 +2.88,12.15,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.21,2.60,2.76,12.17,3.18,0.90 +2.86,12.15,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.21,2.60,2.76,12.17,3.18,0.90 +2.86,12.15,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.21,2.60,2.76,12.17,3.18,0.90 +2.86,12.15,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.21,2.60,2.72,12.17,3.18,0.90 +2.86,12.15,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.21,2.60,2.72,12.17,3.18,0.90 +2.86,12.15,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.21,2.60,2.72,12.17,3.18,0.90 +2.86,12.15,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.21,2.60,2.72,12.17,3.18,0.90 +2.86,12.15,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.21,2.60,2.72,12.17,3.18,0.90 +2.86,12.15,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.21,2.60,2.72,12.17,3.18,0.90 +2.86,12.15,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.21,2.60,2.72,12.17,3.18,0.90 +2.86,12.15,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.21,2.60,2.72,12.17,3.18,0.90 +2.86,12.15,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.21,2.60,2.72,12.17,3.18,0.90 +2.86,12.15,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.21,2.60,2.72,12.17,3.18,0.90 +2.86,12.15,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.21,2.60,2.72,12.17,3.18,0.90 +2.86,12.15,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.21,2.60,2.72,12.17,3.18,0.90 +2.86,12.15,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.21,2.60,2.72,12.17,3.18,0.90 +2.86,12.15,1.80,8.12,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.60,2.72,12.17,3.18,0.90 +2.86,12.15,1.80,8.12,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.60,2.72,12.17,3.18,0.90 +2.86,12.15,1.80,8.12,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.60,2.72,12.17,3.18,0.90 +2.86,12.15,1.80,8.12,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.60,2.72,12.17,3.18,0.90 +2.86,12.15,1.80,8.12,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.60,2.72,12.17,3.18,0.90 +2.86,12.15,1.80,8.12,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.60,2.72,12.17,3.10,0.90 +2.86,12.15,1.80,8.12,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.60,2.72,12.17,3.10,0.90 +2.86,12.15,1.80,8.12,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.60,2.72,12.17,3.10,0.90 +2.86,12.15,1.80,8.12,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.60,2.72,12.17,3.10,0.90 +2.86,12.15,1.80,8.12,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.60,2.72,12.17,3.10,0.90 +2.86,12.15,1.80,8.12,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.60,2.72,12.17,3.10,0.90 +2.86,12.15,1.80,8.12,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.60,2.72,12.17,3.10,0.90 +2.86,12.15,1.80,8.12,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.60,2.72,12.17,3.10,0.90 +2.86,12.15,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.60,2.72,12.17,3.10,0.90 +2.86,12.15,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.60,2.72,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.60,2.72,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.60,2.72,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.60,3.51,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.60,3.51,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.60,3.51,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.60,3.51,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.60,3.51,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.60,3.51,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.60,3.51,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.60,3.51,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.60,3.51,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.60,3.51,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.60,3.51,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.60,3.51,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.60,3.51,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.60,3.51,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.60,3.51,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.14,2.60,3.51,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.14,2.60,3.51,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.14,2.60,3.51,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.14,2.60,3.51,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.14,2.60,3.51,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.14,2.60,3.51,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.22,2.60,3.51,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.22,2.60,3.51,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.22,2.67,3.51,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.22,2.67,3.51,12.17,3.10,0.90 +3.14,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.22,2.67,3.51,12.17,3.10,0.90 +3.14,12.14,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.22,2.67,3.51,12.17,3.10,0.90 +3.14,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.22,2.67,3.51,12.17,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.22,2.67,3.51,12.17,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.22,2.67,3.51,12.17,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.18,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.18,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.18,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.18,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.18,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.18,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.18,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.18,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.18,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.18,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.18,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.18,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.18,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.70,1.12,1.12,7.22,2.67,2.74,12.18,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.71,0.88,29.70,1.12,1.12,7.22,2.67,2.74,12.18,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.22,2.67,2.74,12.18,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.22,2.67,2.74,12.18,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.22,2.67,2.74,12.18,3.10,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.22,2.67,2.74,12.18,3.11,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.22,2.67,2.74,12.18,3.11,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.22,2.67,2.74,12.18,3.11,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.67,2.74,12.18,3.11,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.67,2.74,12.18,3.11,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.11,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.11,0.90 +2.86,12.15,1.80,8.20,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.11,0.90 +2.86,12.15,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.11,0.90 +2.86,12.15,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.07,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.07,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.09,2.63,2.74,12.18,3.07,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.63,2.74,12.18,3.07,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.63,2.74,12.18,3.07,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.63,2.74,12.18,3.07,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.67,2.74,12.18,3.07,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.67,2.74,12.18,3.07,0.90 +2.88,12.14,1.80,8.17,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.67,2.74,12.18,3.07,0.90 +2.88,12.14,1.80,8.14,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.67,2.74,12.18,3.07,0.90 +2.88,12.14,1.80,8.14,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.67,2.74,12.18,3.07,0.90 +2.88,12.14,1.80,8.14,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.67,2.74,12.18,3.07,0.90 +2.88,12.14,1.80,8.14,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.67,2.74,12.17,3.07,0.90 +2.88,12.14,1.80,8.14,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.67,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.14,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.67,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.14,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.67,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.14,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.67,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.14,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.67,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.14,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.67,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.14,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.67,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.14,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.67,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.14,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.67,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.14,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.67,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.14,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.67,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.14,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.67,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.14,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.67,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.14,3.30,3.70,0.88,29.60,1.12,1.12,7.17,2.67,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.14,3.30,3.70,0.88,29.60,1.12,1.12,7.17,2.67,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.14,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.67,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.14,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.67,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.14,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.67,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.14,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.67,2.78,12.17,3.46,0.90 +2.88,12.14,1.80,8.14,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.67,2.78,12.17,3.46,0.90 +2.88,12.14,1.80,8.14,3.30,3.73,0.88,29.60,1.12,1.12,7.12,2.67,2.78,12.17,3.46,0.90 +2.88,12.14,1.80,8.14,3.30,3.73,0.88,29.60,1.12,1.12,7.12,2.67,2.78,12.17,3.46,0.90 +2.88,12.14,1.80,8.14,3.30,3.73,0.88,29.60,1.12,1.12,7.12,2.66,2.78,12.17,3.46,0.90 +2.88,12.14,1.80,8.14,3.30,3.73,0.88,29.60,1.12,1.12,7.12,2.66,2.78,12.17,3.46,0.90 +2.88,12.14,1.80,8.14,3.30,3.73,0.88,29.60,1.12,1.12,7.12,2.66,2.78,12.17,3.46,0.90 +2.88,12.14,1.80,8.29,3.30,3.73,0.88,29.60,1.12,1.12,7.12,2.66,2.78,12.17,3.46,0.90 +2.88,12.14,1.80,8.29,3.30,3.73,0.88,29.60,1.12,1.12,7.12,2.66,2.78,12.17,3.46,0.90 +2.88,12.15,1.80,8.29,3.30,3.73,0.88,29.60,1.12,1.12,7.12,2.66,2.78,12.17,3.46,0.90 +2.88,12.15,1.80,8.29,3.30,3.73,0.88,29.60,1.12,1.12,7.12,2.66,2.78,12.17,3.46,0.90 +2.88,12.15,1.80,8.29,3.30,3.73,0.88,29.60,1.12,1.12,7.12,2.66,2.74,12.18,3.46,0.90 +2.88,12.15,1.80,8.29,3.30,3.73,0.88,29.60,1.12,1.12,7.12,2.66,2.74,12.18,3.46,0.90 +2.88,12.15,1.80,8.29,3.30,3.73,0.88,29.60,1.12,1.12,7.12,2.66,2.74,12.18,3.46,0.90 +2.88,12.15,1.80,8.29,3.30,3.73,0.88,29.60,1.12,1.12,7.12,2.66,2.74,12.18,3.46,0.90 +2.88,12.15,1.80,8.29,3.30,3.73,0.88,29.60,1.12,1.12,7.12,2.66,2.74,12.18,3.46,0.90 +2.88,12.15,1.80,8.29,3.30,3.73,0.88,29.60,1.12,1.12,7.12,2.66,2.74,12.18,3.46,0.90 +2.88,12.15,1.80,8.29,3.30,3.73,0.88,29.60,1.12,1.12,7.12,2.66,2.74,12.18,3.46,0.90 +2.88,12.15,1.80,8.29,3.30,3.73,0.88,29.60,1.12,1.12,7.12,2.66,2.74,12.18,3.46,0.90 +2.88,12.15,1.80,8.29,3.30,3.73,0.88,29.60,1.12,1.12,7.12,2.66,2.74,12.18,3.46,0.90 +2.88,12.15,1.80,8.29,3.30,3.73,0.88,29.60,1.12,1.12,7.12,2.66,2.74,12.18,3.46,0.90 +2.88,12.15,1.80,8.29,3.30,3.73,0.88,29.60,1.12,1.12,7.12,2.66,2.74,12.18,3.46,0.90 +2.88,12.15,1.80,8.29,3.30,3.73,0.88,29.60,1.12,1.12,7.12,2.66,2.74,12.18,3.46,0.90 +2.88,12.15,1.80,8.29,3.30,3.73,0.88,29.60,1.12,1.12,7.12,2.66,2.74,12.18,3.46,0.90 +2.88,12.15,1.80,8.29,3.30,3.73,0.88,29.60,1.12,1.12,7.12,2.66,2.74,12.18,3.46,0.90 +2.88,12.15,1.80,8.29,3.30,3.73,0.88,29.60,1.12,1.12,7.12,2.66,2.74,12.18,3.46,0.90 +2.88,12.15,1.80,8.29,3.30,3.73,0.88,29.60,1.12,1.12,7.12,2.66,2.74,12.18,3.46,0.90 +2.88,12.15,1.80,8.29,3.30,3.77,0.88,29.60,1.12,1.12,7.12,2.66,2.74,12.18,3.46,0.90 +2.88,12.15,1.80,8.29,3.30,3.77,0.88,29.60,1.12,1.12,7.12,2.66,2.74,12.18,3.46,0.90 +2.88,12.15,1.80,8.29,3.30,3.77,0.88,29.60,1.12,1.12,7.12,2.66,2.74,12.18,3.09,0.90 +2.88,12.15,1.80,8.29,3.30,3.77,0.88,29.60,1.12,1.12,7.12,2.66,2.74,12.18,3.09,0.90 +2.88,12.15,1.80,8.29,3.30,3.77,0.88,29.60,1.12,1.12,7.12,2.66,2.74,12.18,3.09,0.90 +2.88,12.15,1.80,8.29,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.66,2.74,12.18,3.09,0.90 +2.88,12.15,1.80,8.29,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.66,2.74,12.18,3.09,0.90 +2.88,12.15,1.80,8.29,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.18,3.09,0.90 +2.88,12.15,1.80,8.29,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.18,3.09,0.90 +2.88,12.15,1.80,8.29,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.18,3.09,0.90 +2.88,12.15,1.80,8.14,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.18,3.09,0.90 +2.88,12.15,1.80,8.14,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.18,3.09,0.90 +2.88,12.15,1.80,8.14,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.18,3.09,0.90 +2.86,12.15,1.80,8.14,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.18,3.09,0.90 +2.86,12.15,1.80,8.14,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.14,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.14,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.14,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.14,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.14,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.14,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.14,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.14,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.14,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.14,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.14,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.14,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.14,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.14,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.14,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.14,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.14,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.17,3.09,0.90 +2.86,12.15,1.80,8.14,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.17,3.11,0.90 +2.86,12.15,1.80,8.14,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.17,3.11,0.90 +2.86,12.15,1.80,8.14,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.17,3.11,0.90 +2.86,12.15,1.80,8.14,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.17,3.11,0.90 +2.86,12.15,1.80,8.14,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.17,3.11,0.90 +2.86,12.15,1.80,8.14,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.64,2.74,12.17,3.11,0.90 +2.86,12.15,1.80,8.14,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.17,3.11,0.90 +2.86,12.15,1.80,8.14,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.17,3.11,0.90 +2.86,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.17,3.11,0.90 +2.86,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.17,3.11,0.90 +2.86,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.17,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.17,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.23,3.30,3.67,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.23,3.30,3.67,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.18,3.08,0.90 +2.86,12.15,1.80,8.23,3.30,3.67,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.18,3.08,0.90 +2.86,12.15,1.80,8.23,3.30,3.67,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.18,3.08,0.90 +2.86,12.15,1.80,8.23,3.30,3.67,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.18,3.08,0.90 +2.86,12.15,1.80,8.23,3.30,3.67,0.88,29.60,1.12,1.12,7.18,2.60,2.78,12.18,3.08,0.90 +2.86,12.15,1.80,8.23,3.30,3.67,0.88,29.60,1.12,1.12,7.18,2.60,2.78,12.18,3.08,0.90 +2.86,12.15,1.80,8.23,3.30,3.67,0.88,29.60,1.12,1.12,7.18,2.60,2.78,12.18,3.08,0.90 +2.86,12.15,1.80,8.23,3.30,3.67,0.88,29.60,1.12,1.12,7.18,2.60,2.78,12.18,3.08,0.90 +2.86,12.15,1.80,8.23,3.30,3.67,0.88,29.60,1.12,1.12,7.18,2.60,2.78,12.18,3.08,0.90 +2.86,12.15,1.80,8.23,3.30,3.67,0.88,29.60,1.12,1.12,7.18,2.60,2.78,12.18,3.08,0.90 +2.86,12.15,1.80,8.23,3.30,3.67,0.88,29.60,1.12,1.12,7.18,2.60,2.78,12.18,3.08,0.90 +2.86,12.15,1.80,8.23,3.30,3.67,0.88,29.60,1.12,1.12,7.18,2.60,2.78,12.18,3.08,0.90 +2.86,12.15,1.80,8.23,3.30,3.67,0.88,29.60,1.12,1.12,7.18,2.60,2.78,12.18,3.08,0.90 +2.86,12.15,1.80,8.23,3.30,3.67,0.88,29.60,1.12,1.12,7.18,2.60,2.78,12.18,3.08,0.90 +2.86,12.15,1.80,8.23,3.30,3.67,0.88,29.60,1.12,1.12,7.18,2.60,2.78,12.18,3.08,0.90 +2.86,12.15,1.80,8.23,3.30,3.67,0.88,36.70,1.12,1.12,7.18,2.60,2.78,12.18,3.08,0.90 +2.86,12.15,1.80,8.23,3.30,3.67,0.88,36.70,1.12,1.12,7.18,2.60,2.78,12.18,3.08,0.90 +2.86,12.15,1.80,8.23,3.30,3.67,0.88,36.70,1.12,1.12,7.18,2.60,2.78,12.18,3.08,0.90 +2.86,12.15,1.80,8.23,3.30,4.02,0.88,36.70,1.12,1.12,7.18,2.60,2.78,12.18,3.08,0.90 +2.86,12.15,1.80,8.23,3.30,4.02,0.88,36.70,1.12,1.12,7.18,2.60,2.78,12.18,3.08,0.90 +2.86,12.15,1.80,8.23,3.30,4.02,0.88,36.70,1.12,1.12,7.18,2.60,2.78,12.18,3.06,0.90 +2.86,12.15,1.80,8.23,3.30,4.02,0.88,36.70,1.12,1.12,7.18,2.60,2.78,12.18,3.06,0.90 +2.86,12.15,1.80,8.23,3.30,4.02,0.88,36.70,1.12,1.12,7.18,2.60,2.78,12.18,3.06,0.90 +2.86,12.15,1.80,8.23,3.30,4.02,0.88,36.70,1.12,1.12,7.13,2.60,2.78,12.18,3.06,0.90 +2.86,12.15,1.80,8.23,3.30,4.02,0.88,36.70,1.12,1.12,7.13,2.60,2.78,12.18,3.06,0.90 +2.86,12.15,1.80,8.23,3.30,4.02,0.88,36.70,1.12,1.12,7.13,2.60,2.78,12.18,3.06,0.90 +2.86,12.15,1.80,8.23,3.30,4.02,0.88,36.70,1.12,1.12,7.13,2.58,2.78,12.18,3.06,0.90 +2.86,12.15,1.80,8.23,3.30,4.02,0.88,36.70,1.12,1.12,7.13,2.58,2.78,12.18,3.06,0.90 +2.86,12.15,1.80,8.21,3.30,4.02,0.88,36.70,1.12,1.12,7.13,2.58,2.78,12.18,3.06,0.90 +2.86,12.15,1.80,8.21,3.30,4.02,0.88,36.70,1.12,1.12,7.13,2.58,2.78,12.18,3.06,0.90 +2.86,12.14,1.80,8.21,3.30,4.02,0.88,36.70,1.12,1.12,7.13,2.58,2.78,12.18,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,4.02,0.88,36.70,1.12,1.12,7.13,2.58,2.78,12.18,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,4.02,0.88,36.70,1.12,1.12,7.13,2.58,2.78,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,4.02,0.88,36.70,1.12,1.12,7.13,2.58,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,4.02,0.88,36.70,1.12,1.12,7.13,2.58,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,4.02,0.88,36.70,1.12,1.12,7.13,2.58,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,4.02,0.88,36.70,1.12,1.12,7.13,2.58,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,4.02,0.88,36.70,1.12,1.12,7.13,2.58,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,4.02,0.88,36.70,1.12,1.12,7.13,2.58,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,4.02,0.88,36.70,1.12,1.12,7.13,2.58,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,4.02,0.88,36.70,1.12,1.12,7.13,2.58,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,4.02,0.88,36.70,1.12,1.12,7.13,2.58,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,4.02,0.88,36.70,1.12,1.12,7.13,2.58,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,4.02,0.88,36.70,1.12,1.12,7.13,2.58,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,4.02,0.88,36.70,1.12,1.12,7.13,2.58,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,4.02,0.88,29.60,1.12,1.12,7.13,2.58,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,4.02,0.88,29.60,1.12,1.12,7.13,2.58,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,4.02,0.88,29.60,1.12,1.12,7.13,2.58,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,3.64,0.88,29.60,1.12,1.12,7.13,2.58,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,3.64,0.88,29.60,1.12,1.12,7.13,2.58,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,3.64,0.88,29.60,1.12,1.12,7.13,2.58,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.64,0.88,29.60,1.12,1.12,7.13,2.58,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.64,0.88,29.60,1.12,1.12,7.13,2.58,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.64,0.88,29.60,1.12,1.12,7.12,2.58,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.64,0.88,29.60,1.12,1.12,7.12,2.58,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.64,0.88,29.60,1.12,1.12,7.12,2.58,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.64,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.64,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.64,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.12,3.30,3.64,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.12,3.30,3.64,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.17,3.07,0.90 +2.86,12.15,1.80,8.12,3.30,3.64,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.17,3.07,0.90 +2.86,12.15,1.80,8.12,3.30,3.64,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.17,3.07,0.90 +2.86,12.15,1.80,8.12,3.30,3.64,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.18,3.07,0.90 +2.86,12.15,1.80,8.12,3.30,3.64,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.18,3.07,0.90 +2.86,12.15,1.80,8.12,3.30,3.64,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.18,3.07,0.90 +2.86,12.15,1.80,8.12,3.30,3.64,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.18,3.07,0.90 +2.86,12.15,1.80,8.12,3.30,3.64,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.18,3.07,0.90 +2.86,12.15,1.80,8.12,3.30,3.64,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.18,3.07,0.90 +2.86,12.15,1.80,8.12,3.30,3.64,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.18,3.07,0.90 +2.86,12.15,1.80,8.12,3.30,3.64,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.18,3.07,0.90 +2.86,12.15,1.80,8.12,3.30,3.64,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.18,3.07,0.90 +2.86,12.15,1.80,8.12,3.30,3.64,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.18,3.07,0.90 +2.86,12.15,1.80,8.12,3.30,3.64,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.18,3.07,0.90 +2.86,12.15,1.80,8.12,3.30,3.64,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.18,3.07,0.90 +2.86,12.15,1.80,8.12,3.30,3.64,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.18,3.07,0.90 +2.86,12.15,1.80,8.12,3.30,3.64,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.18,3.07,0.90 +2.86,12.15,1.80,8.12,3.30,3.64,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.18,3.07,0.90 +2.86,12.15,1.80,8.12,3.30,3.61,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.18,3.07,0.90 +2.86,12.15,1.80,8.12,3.30,3.61,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.18,3.07,0.90 +2.86,12.15,1.80,8.12,3.30,3.61,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.18,3.07,0.90 +2.86,12.15,1.80,8.12,3.30,3.61,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.18,3.09,0.90 +2.86,12.15,1.80,8.12,3.30,3.61,0.88,29.60,1.12,1.12,7.12,2.61,2.76,12.18,3.09,0.90 +2.86,12.15,1.80,8.12,3.30,3.61,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.18,3.09,0.90 +2.86,12.15,1.80,8.12,3.30,3.61,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.18,3.09,0.90 +2.86,12.15,1.80,8.12,3.30,3.61,0.88,29.60,1.12,1.12,7.17,2.61,2.76,12.18,3.09,0.90 +2.86,12.15,1.80,8.12,3.30,3.61,0.88,29.60,1.12,1.12,7.17,2.58,2.76,12.18,3.09,0.90 +2.86,12.15,1.80,8.12,3.30,3.61,0.88,29.60,1.12,1.12,7.17,2.58,2.76,12.18,3.09,0.90 +2.86,12.15,1.80,8.12,3.30,3.61,0.88,29.60,1.12,1.12,7.17,2.58,2.76,12.18,3.09,0.90 +2.86,12.15,1.80,8.20,3.30,3.61,0.88,29.60,1.12,1.12,7.17,2.58,2.76,12.18,3.09,0.90 +2.86,12.15,1.80,8.20,3.30,3.61,0.88,29.60,1.12,1.12,7.17,2.58,2.76,12.18,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.61,0.88,29.60,1.12,1.12,7.17,2.58,2.76,12.18,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.61,0.88,29.60,1.12,1.12,7.17,2.58,2.76,12.18,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.61,0.88,29.60,1.12,1.12,7.17,2.58,2.76,12.17,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.61,0.88,29.60,1.12,1.12,7.17,2.58,2.76,12.17,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.61,0.88,29.60,1.12,1.12,7.17,2.58,2.76,12.17,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.61,0.88,29.60,1.12,1.12,7.17,2.58,2.76,12.17,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.61,0.88,29.60,1.12,1.12,7.17,2.58,2.76,12.17,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.61,0.88,29.60,1.12,1.12,7.17,2.58,2.76,12.17,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.61,0.88,29.60,1.12,1.12,7.17,2.58,2.76,12.17,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.61,0.88,29.60,1.12,1.12,7.17,2.58,2.76,12.17,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.61,0.88,29.60,1.12,1.12,7.17,2.58,2.76,12.17,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.61,0.88,29.60,1.12,1.12,7.17,2.58,2.76,12.17,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.61,0.88,29.60,1.12,1.12,7.17,2.58,2.76,12.17,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.61,0.88,29.60,1.12,1.12,7.17,2.58,2.76,12.17,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.61,0.88,29.60,1.12,1.12,7.17,2.58,2.76,12.17,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.61,0.88,29.50,1.12,1.12,7.17,2.58,2.76,12.17,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.61,0.88,29.50,1.12,1.12,7.17,2.58,2.76,12.17,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.61,0.88,29.50,1.12,1.12,7.17,2.58,2.76,12.17,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.70,0.88,29.50,1.12,1.12,7.17,2.58,2.76,12.17,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.70,0.88,29.50,1.12,1.12,7.17,2.58,2.76,12.17,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.70,0.88,29.50,1.12,1.12,7.17,2.58,2.76,12.17,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.70,0.88,29.50,1.12,1.12,7.17,2.58,2.76,12.17,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.70,0.88,29.50,1.12,1.12,7.17,2.58,2.76,12.17,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.70,0.88,29.50,1.12,1.12,7.14,2.58,2.76,12.17,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.70,0.88,29.50,1.12,1.12,7.14,2.58,2.76,12.17,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.70,0.88,29.50,1.12,1.12,7.14,2.58,2.76,12.17,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.70,0.88,29.50,1.12,1.12,7.14,2.61,2.76,12.17,3.09,0.90 +2.90,12.15,1.80,8.20,3.30,3.70,0.88,29.50,1.12,1.12,7.14,2.61,2.76,12.17,3.09,0.90 +2.90,12.15,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.14,2.61,2.76,12.17,3.09,0.90 +2.90,12.15,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.14,2.61,2.76,12.17,3.09,0.90 +2.90,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.14,2.61,2.76,12.17,3.09,0.90 +3.12,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.14,2.61,2.76,12.17,3.09,0.90 +3.12,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.14,2.61,2.76,12.17,3.09,0.90 +3.12,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.14,2.61,3.51,12.17,3.09,0.90 +3.12,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.14,2.61,3.51,12.17,3.09,0.90 +3.12,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.14,2.61,3.51,12.17,3.09,0.90 +3.12,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.14,2.61,3.51,12.17,3.09,0.90 +3.12,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.14,2.61,3.51,12.17,3.09,0.90 +3.12,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.14,2.61,3.51,12.17,3.09,0.90 +3.12,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.14,2.61,3.51,12.17,3.09,0.90 +3.12,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.14,2.61,3.51,12.17,3.09,0.90 +3.12,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.14,2.61,3.51,12.17,3.09,0.90 +3.12,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.14,2.61,3.51,12.17,3.09,0.90 +3.12,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.14,2.61,3.51,12.17,3.09,0.90 +3.12,12.14,1.80,8.12,3.30,3.70,0.88,29.50,1.12,1.12,7.14,2.61,3.51,12.17,3.09,0.90 +3.12,12.14,1.80,8.12,3.30,3.70,0.88,29.70,1.12,1.12,7.14,2.61,3.51,12.17,3.09,0.90 +3.12,12.14,1.80,8.12,3.30,3.70,0.88,29.70,1.12,1.12,7.14,2.61,3.51,12.17,3.09,0.90 +3.12,12.14,1.80,8.12,3.30,3.61,0.88,29.70,1.12,1.12,7.14,2.61,3.51,12.17,3.09,0.90 +3.12,12.14,1.80,8.12,3.30,3.61,0.88,29.70,1.12,1.12,7.14,2.61,3.51,12.17,3.09,0.90 +3.12,12.14,1.80,8.12,3.30,3.61,0.88,29.70,1.12,1.12,7.14,2.61,3.51,12.17,3.09,0.90 +3.12,12.14,1.80,8.12,3.30,3.61,0.88,29.70,1.12,1.12,7.14,2.61,3.51,12.17,3.14,0.90 +3.12,12.14,1.80,8.12,3.30,3.61,0.88,29.70,1.12,1.12,7.14,2.61,3.51,12.17,3.14,0.90 +3.12,12.14,1.80,8.12,3.30,3.61,0.88,29.70,1.12,1.12,7.14,2.61,3.51,12.17,3.14,0.90 +3.12,12.14,1.80,8.12,3.30,3.61,0.88,29.70,1.12,1.12,7.17,2.61,3.51,12.17,3.14,0.90 +3.12,12.14,1.80,8.12,3.30,3.61,0.88,29.70,1.12,1.12,7.17,2.61,3.51,12.17,3.14,0.90 +3.12,12.14,1.80,8.12,3.30,3.61,0.88,29.70,1.12,1.12,7.17,2.67,3.51,12.17,3.14,0.90 +3.12,12.14,1.80,8.12,3.30,3.61,0.88,29.70,1.12,1.12,7.17,2.67,3.51,12.17,3.14,0.90 +3.12,12.14,1.80,8.12,3.30,3.61,0.88,29.70,1.12,1.12,7.17,2.67,3.51,12.17,3.14,0.90 +3.12,12.14,1.80,8.21,3.30,3.61,0.88,29.70,1.12,1.12,7.17,2.67,3.51,12.17,3.14,0.90 +3.12,12.14,1.80,8.21,3.30,3.61,0.88,29.70,1.12,1.12,7.17,2.67,3.51,12.17,3.14,0.90 +2.88,12.15,1.80,8.21,3.30,3.61,0.88,29.70,1.12,1.12,7.17,2.67,3.51,12.17,3.14,0.90 +2.88,12.15,1.80,8.21,3.30,3.61,0.88,29.70,1.12,1.12,7.17,2.67,3.51,12.17,3.14,0.90 +2.88,12.15,1.80,8.21,3.30,3.61,0.88,29.70,1.12,1.12,7.17,2.67,3.51,12.17,3.14,0.90 +2.88,12.15,1.80,8.21,3.30,3.61,0.88,29.70,1.12,1.12,7.17,2.67,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.21,3.30,3.61,0.88,29.70,1.12,1.12,7.17,2.67,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.21,3.30,3.61,0.88,29.70,1.12,1.12,7.17,2.67,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.21,3.30,3.61,0.88,29.70,1.12,1.12,7.17,2.67,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.21,3.30,3.61,0.88,29.70,1.12,1.12,7.17,2.67,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.21,3.30,3.61,0.88,29.70,1.12,1.12,7.17,2.67,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.21,3.30,3.61,0.88,29.70,1.12,1.12,7.17,2.67,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.21,3.30,3.61,0.88,29.70,1.12,1.12,7.17,2.67,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.21,3.30,3.61,0.88,29.70,1.12,1.12,7.17,2.67,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.21,3.30,3.61,0.88,29.70,1.12,1.12,7.17,2.67,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.21,3.30,3.61,0.88,29.70,1.12,1.12,7.17,2.67,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.21,3.30,3.61,0.88,29.70,1.12,1.12,7.17,2.67,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.21,3.30,3.61,0.88,29.60,1.12,1.12,7.17,2.67,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.21,3.30,3.62,0.88,29.60,1.12,1.12,7.17,2.67,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.21,3.30,3.62,0.88,29.60,1.12,1.12,7.17,2.67,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.21,3.30,3.62,0.88,29.60,1.12,1.12,7.17,2.67,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.21,3.30,3.62,0.88,29.60,1.12,1.12,7.17,2.67,2.74,12.17,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.62,0.88,29.60,1.12,1.12,7.17,2.67,2.74,12.17,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.62,0.88,29.60,1.12,1.12,7.21,2.67,2.74,12.17,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.62,0.88,29.60,1.12,1.12,7.21,2.67,2.74,12.17,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.62,0.88,29.60,1.12,1.12,7.21,2.67,2.74,12.17,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.62,0.88,29.60,1.12,1.12,7.21,2.63,2.74,12.17,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.62,0.88,29.60,1.12,1.12,7.21,2.63,2.74,12.17,3.11,0.90 +2.88,12.15,1.80,8.21,3.30,3.62,0.88,29.60,1.12,1.12,7.21,2.63,2.74,12.17,3.11,0.90 +2.88,12.15,1.80,8.20,3.30,3.62,0.88,29.60,1.12,1.12,7.21,2.63,2.74,12.17,3.11,0.90 +2.88,12.14,1.80,8.20,3.30,3.62,0.88,29.60,1.12,1.12,7.21,2.63,2.74,12.17,3.11,0.90 +2.88,12.14,1.80,8.20,3.30,3.62,0.88,29.60,1.12,1.12,7.21,2.63,2.74,12.17,3.11,0.90 +2.88,12.14,1.80,8.20,3.30,3.62,0.88,29.60,1.12,1.12,7.21,2.63,2.74,12.17,3.11,0.90 +2.88,12.14,1.80,8.20,3.30,3.62,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.11,0.90 +2.88,12.14,1.80,8.20,3.30,3.62,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.11,0.90 +2.88,12.14,1.80,8.20,3.30,3.62,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.11,0.90 +2.88,12.14,1.80,8.20,3.30,3.62,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.11,0.90 +2.88,12.14,1.80,8.20,3.30,3.62,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.11,0.90 +2.88,12.14,1.80,8.20,3.30,3.62,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.11,0.90 +2.88,12.14,1.80,8.20,3.30,3.62,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.11,0.90 +2.88,12.14,1.80,8.20,3.30,3.62,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.11,0.90 +2.88,12.14,1.80,8.20,3.30,3.62,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.11,0.90 +2.88,12.14,1.80,8.20,3.30,3.62,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.11,0.90 +2.88,12.14,1.80,8.20,3.30,3.62,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.11,0.90 +2.88,12.14,1.80,8.20,3.30,3.62,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.11,0.90 +2.88,12.14,1.80,8.20,3.30,3.62,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.11,0.90 +2.88,12.14,1.80,8.20,3.30,3.62,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.11,0.90 +2.88,12.14,1.80,8.20,3.30,3.62,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.11,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.11,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.11,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.11,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.23,2.63,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.23,2.63,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.71,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.71,0.88,29.60,1.12,1.12,7.23,2.60,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.71,0.88,29.70,1.12,1.12,7.23,2.60,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.71,0.88,29.70,1.12,1.12,7.23,2.60,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.71,0.88,29.70,1.12,1.12,7.23,2.60,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.60,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.60,2.76,12.17,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.60,2.76,12.17,3.15,0.90 +2.88,12.14,1.80,8.21,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.60,2.76,12.17,3.15,0.90 +2.88,12.14,1.80,8.21,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.60,2.76,12.17,3.15,0.90 +2.88,12.14,1.80,8.21,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.60,2.76,12.17,3.15,0.90 +2.88,12.14,1.80,8.21,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.60,2.76,12.17,3.15,0.90 +2.88,12.14,1.80,8.21,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.61,2.76,12.17,3.15,0.90 +2.88,12.14,1.80,8.21,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.61,2.76,12.17,3.15,0.90 +2.88,12.14,1.80,8.21,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.61,2.76,12.17,3.15,0.90 +2.88,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.61,2.76,12.17,3.15,0.90 +2.88,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.61,2.76,12.17,3.15,0.90 +3.04,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.61,2.76,12.17,3.15,0.90 +3.04,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.61,2.76,12.17,3.15,0.90 +3.04,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.61,2.76,12.18,3.15,0.90 +3.04,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.61,2.76,12.18,3.15,0.90 +3.04,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.61,2.76,12.18,3.15,0.90 +3.04,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.61,2.76,12.18,3.15,0.90 +3.04,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.61,2.76,12.18,3.15,0.90 +3.04,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.61,2.76,12.18,3.15,0.90 +3.04,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.61,2.76,12.18,3.15,0.90 +3.04,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.61,2.76,12.18,3.15,0.90 +3.04,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.61,2.76,12.18,3.15,0.90 +3.04,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.61,2.76,12.18,3.15,0.90 +3.04,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.61,2.76,12.18,3.15,0.90 +3.04,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.61,2.76,12.18,3.15,0.90 +3.04,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.61,2.76,12.18,3.15,0.90 +3.04,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.61,2.76,12.18,3.15,0.90 +3.04,12.14,1.80,8.29,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.61,2.76,12.18,3.15,0.90 +3.04,12.14,1.80,8.29,3.30,3.67,0.88,29.70,1.12,1.12,7.13,2.61,2.76,12.18,3.15,0.90 +3.04,12.14,1.80,8.29,3.30,3.67,0.88,29.70,1.12,1.12,7.13,2.61,2.76,12.18,3.15,0.90 +3.04,12.14,1.80,8.29,3.30,3.67,0.88,29.70,1.12,1.12,7.13,2.61,2.76,12.18,3.15,0.90 +3.04,12.14,1.80,8.29,3.30,3.67,0.88,29.70,1.12,1.12,7.13,2.61,2.76,12.18,3.04,0.90 +3.04,12.14,1.80,8.29,3.30,3.67,0.88,29.70,1.12,1.12,7.13,2.61,2.76,12.18,3.04,0.90 +3.04,12.14,1.80,8.29,3.30,3.67,0.88,29.70,1.12,1.12,7.26,2.61,2.76,12.18,3.04,0.90 +3.04,12.14,1.80,8.29,3.30,3.67,0.88,29.70,1.12,1.12,7.26,2.61,2.76,12.18,3.04,0.90 +3.04,12.14,1.80,8.29,3.30,3.67,0.88,29.70,1.12,1.12,7.26,2.61,2.76,12.18,3.04,0.90 +3.04,12.14,1.80,8.29,3.30,3.67,0.88,29.70,1.12,1.12,7.26,2.58,2.76,12.18,3.04,0.90 +3.04,12.14,1.80,8.29,3.30,3.67,0.88,29.70,1.12,1.12,7.26,2.58,2.76,12.18,3.04,0.90 +3.04,12.14,1.80,8.29,3.30,3.67,0.88,29.70,1.12,1.12,7.26,2.58,2.76,12.18,3.04,0.90 +3.04,12.14,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.26,2.58,2.76,12.18,3.04,0.90 +3.04,12.15,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.26,2.58,2.76,12.18,3.04,0.90 +2.86,12.15,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.26,2.58,2.76,12.18,3.04,0.90 +2.86,12.15,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.26,2.58,2.76,12.18,3.04,0.90 +2.86,12.15,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.26,2.58,2.76,12.18,3.04,0.90 +2.86,12.15,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.26,2.58,2.76,12.18,3.04,0.90 +2.86,12.15,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.26,2.58,2.76,12.18,3.04,0.90 +2.86,12.15,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.26,2.58,2.76,12.18,3.04,0.90 +2.86,12.15,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.26,2.58,2.76,12.18,3.04,0.90 +2.86,12.15,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.26,2.58,2.76,12.18,3.04,0.90 +2.86,12.15,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.26,2.58,2.76,12.18,3.04,0.90 +2.86,12.15,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.26,2.58,2.76,12.18,3.04,0.90 +2.86,12.15,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.26,2.58,2.76,12.18,3.04,0.90 +2.86,12.15,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.26,2.58,2.76,12.18,3.04,0.90 +2.86,12.15,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.26,2.58,2.76,12.18,3.04,0.90 +2.86,12.15,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.26,2.58,2.76,12.18,3.04,0.90 +2.86,12.15,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.26,2.58,2.76,12.18,3.04,0.90 +2.86,12.15,1.80,8.14,3.30,3.67,0.88,29.60,1.12,1.12,7.26,2.58,2.76,12.18,3.04,0.90 +2.86,12.15,1.80,8.14,3.30,3.67,0.88,29.60,1.12,1.12,7.26,2.58,2.76,12.18,3.04,0.90 +2.86,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.26,2.58,2.76,12.18,3.04,0.90 +2.86,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.26,2.58,2.76,12.18,3.04,0.90 +2.86,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.26,2.58,2.76,12.18,3.04,0.90 +2.86,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.26,2.58,2.76,12.18,3.14,0.90 +2.86,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.26,2.58,2.76,12.18,3.14,0.90 +2.86,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.26,2.58,2.76,12.18,3.14,0.90 +2.86,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.27,2.58,2.76,12.18,3.14,0.90 +2.86,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.27,2.58,2.76,12.18,3.14,0.90 +2.86,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.27,2.62,2.76,12.18,3.14,0.90 +2.86,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.27,2.62,2.76,12.18,3.14,0.90 +2.86,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.27,2.62,2.76,12.18,3.14,0.90 +2.86,12.15,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.27,2.62,2.76,12.18,3.14,0.90 +2.86,12.15,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.27,2.62,2.76,12.18,3.14,0.90 +2.86,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.27,2.62,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.27,2.62,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.27,2.62,2.76,12.18,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.27,2.62,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.27,2.62,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.27,2.62,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.27,2.62,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.27,2.62,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.27,2.62,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.27,2.62,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.27,2.62,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.27,2.62,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.27,2.62,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.27,2.62,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.27,2.62,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.27,2.62,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.27,2.62,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.65,0.88,29.60,1.12,1.12,7.27,2.62,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.27,2.62,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.27,2.62,2.74,12.18,3.14,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.27,2.62,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.27,2.62,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.27,2.62,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.14,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.21,2.62,2.74,12.18,3.11,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.18,3.11,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.18,3.11,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.18,3.11,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.18,3.11,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.18,3.11,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.18,3.11,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.18,3.11,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.18,3.11,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.18,3.11,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.18,3.11,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.18,3.11,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.18,3.11,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.18,3.11,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.70,1.12,1.12,7.21,2.62,2.76,12.18,3.11,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.70,1.12,1.12,7.21,2.62,2.76,12.18,3.11,0.90 +2.88,12.15,1.80,8.14,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.62,2.76,12.18,3.11,0.90 +2.88,12.15,1.80,8.14,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.62,2.76,12.18,3.11,0.90 +2.88,12.15,1.80,8.14,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.62,2.76,12.18,3.11,0.90 +2.88,12.15,1.80,8.14,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.62,2.76,12.18,3.06,0.90 +2.88,12.15,1.80,8.14,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.62,2.76,12.18,3.06,0.90 +2.88,12.15,1.80,8.14,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.62,2.76,12.18,3.06,0.90 +2.88,12.15,1.80,8.14,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.62,2.76,12.18,3.06,0.90 +2.88,12.15,1.80,8.14,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.62,2.76,12.18,3.06,0.90 +2.88,12.15,1.80,8.14,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.18,3.06,0.90 +2.88,12.15,1.80,8.14,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.18,3.06,0.90 +2.88,12.15,1.80,8.14,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.18,3.06,0.90 +2.88,12.15,1.80,8.64,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.18,3.06,0.90 +2.88,12.14,1.80,8.64,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.18,3.06,0.90 +3.04,12.14,1.80,8.64,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.18,3.06,0.90 +3.04,12.14,1.80,8.64,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,2.76,12.17,3.06,0.90 +3.04,12.14,1.80,8.64,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,3.35,12.17,3.06,0.90 +3.04,12.14,1.80,8.64,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,3.35,12.17,3.06,0.90 +3.04,12.14,1.80,8.64,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,3.35,12.17,3.06,0.90 +3.04,12.14,1.80,8.64,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,3.35,12.17,3.06,0.90 +3.04,12.14,1.80,8.64,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,3.35,12.17,3.06,0.90 +3.04,12.14,1.80,8.64,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,3.35,12.17,3.06,0.90 +3.04,12.14,1.80,8.64,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,3.35,12.17,3.06,0.90 +3.04,12.14,1.80,8.64,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,3.35,12.17,3.06,0.90 +3.04,12.14,1.80,8.64,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,3.35,12.17,3.06,0.90 +3.04,12.14,1.80,8.64,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,3.35,12.17,3.06,0.90 +3.04,12.14,1.80,8.64,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,3.35,12.17,3.06,0.90 +3.04,12.14,1.80,8.64,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,3.35,12.17,3.06,0.90 +3.04,12.14,1.80,8.64,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,3.35,12.17,3.06,0.90 +3.04,12.14,1.80,8.64,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,3.35,12.17,3.06,0.90 +3.04,12.14,1.80,8.64,3.30,3.71,0.88,29.70,1.12,1.12,7.21,2.60,3.35,12.17,3.06,0.90 +3.04,12.14,1.80,8.64,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,3.35,12.17,3.06,0.90 +3.04,12.14,1.80,8.64,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,3.35,12.17,3.06,0.90 +3.04,12.14,1.80,8.64,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,3.35,12.17,3.06,0.90 +3.04,12.14,1.80,8.64,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,3.35,12.17,3.07,0.90 +3.04,12.14,1.80,8.64,3.30,3.70,0.88,29.70,1.12,1.12,7.21,2.60,3.35,12.17,3.07,0.90 +3.04,12.14,1.80,8.64,3.30,3.70,0.88,29.70,1.12,1.12,7.12,2.60,3.35,12.17,3.07,0.90 +3.04,12.14,1.80,8.64,3.30,3.70,0.88,29.70,1.12,1.12,7.12,2.60,3.35,12.17,3.07,0.90 +3.04,12.14,1.80,8.64,3.30,3.70,0.88,29.70,1.12,1.12,7.12,2.60,3.35,12.17,3.07,0.90 +3.04,12.14,1.80,8.64,3.30,3.70,0.88,29.70,1.12,1.12,7.12,2.59,3.35,12.17,3.07,0.90 +3.04,12.14,1.80,8.64,3.30,3.70,0.88,29.70,1.12,1.12,7.12,2.59,3.35,12.17,3.07,0.90 +3.04,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.12,2.59,3.35,12.17,3.07,0.90 +3.04,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.12,2.59,3.35,12.17,3.07,0.90 +3.04,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.12,2.59,3.35,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.12,2.59,3.35,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.12,2.59,3.35,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.12,2.59,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.12,2.59,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.12,2.59,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.12,2.59,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.12,2.59,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.12,2.59,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.12,2.59,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.12,2.59,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.12,2.59,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.12,2.59,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.12,2.59,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.70,0.88,29.70,1.12,1.12,7.12,2.59,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.12,2.59,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.12,2.59,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.70,0.88,29.60,1.12,1.12,7.12,2.59,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.62,0.88,29.60,1.12,1.12,7.12,2.59,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.62,0.88,29.60,1.12,1.12,7.12,2.59,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.62,0.88,29.60,1.12,1.12,7.12,2.59,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.21,3.30,3.62,0.88,29.60,1.12,1.12,7.12,2.59,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.62,0.88,29.60,1.12,1.12,7.12,2.59,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.62,0.88,29.60,1.12,1.12,7.18,2.59,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.62,0.88,29.60,1.12,1.12,7.18,2.59,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.62,0.88,29.60,1.12,1.12,7.18,2.59,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.62,0.88,29.60,1.12,1.12,7.18,2.59,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.62,0.88,29.60,1.12,1.12,7.18,2.59,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.62,0.88,29.60,1.12,1.12,7.18,2.59,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.15,3.30,3.62,0.88,29.60,1.12,1.12,7.18,2.59,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.15,3.30,3.62,0.88,29.60,1.12,1.12,7.18,2.59,2.76,12.17,3.09,0.90 +2.86,12.14,1.80,8.15,3.30,3.62,0.88,29.60,1.12,1.12,7.18,2.59,2.76,12.17,3.09,0.90 +2.86,12.14,1.80,8.15,3.30,3.62,0.88,29.60,1.12,1.12,7.18,2.59,2.76,12.17,3.09,0.90 +2.86,12.14,1.80,8.15,3.30,3.62,0.88,29.60,1.12,1.12,7.18,2.59,2.74,12.17,3.09,0.90 +2.86,12.14,1.80,8.15,3.30,3.62,0.88,29.60,1.12,1.12,7.18,2.59,2.74,12.17,3.09,0.90 +2.86,12.14,1.80,8.15,3.30,3.62,0.88,29.60,1.12,1.12,7.18,2.59,2.74,12.17,3.09,0.90 +2.86,12.14,1.80,8.15,3.30,3.62,0.88,29.60,1.12,1.12,7.18,2.59,2.74,12.17,3.09,0.90 +2.86,12.14,1.80,8.15,3.30,3.62,0.88,29.60,1.12,1.12,7.18,2.59,2.74,12.17,3.09,0.90 +2.86,12.14,1.80,8.15,3.30,3.62,0.88,29.60,1.12,1.12,7.18,2.59,2.74,12.17,3.09,0.90 +2.86,12.14,1.80,8.15,3.30,3.62,0.88,29.60,1.12,1.12,7.18,2.59,2.74,12.17,3.09,0.90 +2.86,12.14,1.80,8.15,3.30,3.62,0.88,29.60,1.12,1.12,7.18,2.59,2.74,12.17,3.09,0.90 +2.86,12.14,1.80,8.15,3.30,3.62,0.88,29.60,1.12,1.12,7.18,2.59,2.74,12.17,3.09,0.90 +2.86,12.14,1.80,8.15,3.30,3.62,0.88,29.60,1.12,1.12,7.18,2.59,2.74,12.17,3.09,0.90 +2.86,12.14,1.80,8.15,3.30,3.62,0.88,29.60,1.12,1.12,7.18,2.59,2.74,12.17,3.09,0.90 +2.86,12.14,1.80,8.15,3.30,3.62,0.88,29.60,1.12,1.12,7.18,2.59,2.74,12.17,3.09,0.90 +2.86,12.14,1.80,8.15,3.30,3.62,0.88,29.70,1.12,1.12,7.18,2.59,2.74,12.17,3.09,0.90 +2.86,12.14,1.80,8.15,3.30,3.62,0.88,29.70,1.12,1.12,7.18,2.59,2.74,12.17,3.09,0.90 +2.86,12.14,1.80,8.15,3.30,3.62,0.88,29.70,1.12,1.12,7.18,2.59,2.74,12.17,3.09,0.90 +2.86,12.14,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.18,2.59,2.74,12.17,3.09,0.90 +2.86,12.14,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.18,2.59,2.74,12.17,3.09,0.90 +2.86,12.14,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.18,2.59,2.74,12.17,3.09,0.90 +2.86,12.14,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.18,2.59,2.74,12.17,3.07,0.90 +2.86,12.14,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.18,2.59,2.74,12.17,3.07,0.90 +2.86,12.14,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.08,2.59,2.74,12.17,3.07,0.90 +2.86,12.14,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.08,2.59,2.74,12.17,3.07,0.90 +2.86,12.14,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.08,2.59,2.74,12.17,3.07,0.90 +2.86,12.14,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.08,2.63,2.74,12.17,3.07,0.90 +2.86,12.14,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.08,2.63,2.74,12.17,3.07,0.90 +2.86,12.14,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.08,2.63,2.74,12.17,3.07,0.90 +2.86,12.14,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.08,2.63,2.74,12.17,3.07,0.90 +2.86,12.14,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.08,2.63,2.74,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.08,2.63,2.74,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.08,2.63,2.74,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.08,2.63,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.08,2.63,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.08,2.63,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.08,2.63,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.08,2.63,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.08,2.63,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.08,2.63,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.08,2.63,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.08,2.63,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.08,2.63,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.08,2.63,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.08,2.63,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.08,2.63,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.60,1.12,1.12,7.08,2.63,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.60,1.12,1.12,7.08,2.63,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.66,0.88,29.60,1.12,1.12,7.08,2.63,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.64,0.88,29.60,1.12,1.12,7.08,2.63,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.64,0.88,29.60,1.12,1.12,7.08,2.63,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.64,0.88,29.60,1.12,1.12,7.08,2.63,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.64,0.88,29.60,1.12,1.12,7.08,2.63,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.64,0.88,29.60,1.12,1.12,7.08,2.63,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.64,0.88,29.60,1.12,1.12,7.18,2.63,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.64,0.88,29.60,1.12,1.12,7.18,2.63,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.64,0.88,29.60,1.12,1.12,7.18,2.63,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.64,0.88,29.60,1.12,1.12,7.18,2.57,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.64,0.88,29.60,1.12,1.12,7.18,2.57,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.64,0.88,29.60,1.12,1.12,7.18,2.57,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.64,0.88,29.60,1.12,1.12,7.18,2.57,2.76,12.17,3.07,0.90 +2.88,12.14,1.80,8.17,3.30,3.64,0.88,29.60,1.12,1.12,7.18,2.57,2.76,12.17,3.07,0.90 +3.14,12.14,1.80,8.17,3.30,3.64,0.88,29.60,1.12,1.12,7.18,2.57,2.76,12.17,3.07,0.90 +3.14,12.14,1.80,8.17,3.30,3.64,0.88,29.60,1.12,1.12,7.18,2.57,2.76,12.17,3.07,0.90 +3.14,12.14,1.80,8.17,3.30,3.64,0.88,29.60,1.12,1.12,7.18,2.57,3.53,12.17,3.07,0.90 +3.14,12.14,1.80,8.17,3.30,3.64,0.88,29.60,1.12,1.12,7.18,2.57,3.53,12.17,3.07,0.90 +3.14,12.14,1.80,8.17,3.30,3.64,0.88,29.60,1.12,1.12,7.18,2.57,3.53,12.17,3.07,0.90 +3.14,12.14,1.80,8.17,3.30,3.64,0.88,29.60,1.12,1.12,7.18,2.57,3.53,12.17,3.07,0.90 +3.14,12.14,1.80,8.17,3.30,3.64,0.88,29.60,1.12,1.12,7.18,2.57,3.53,12.17,3.07,0.90 +3.14,12.14,1.80,8.17,3.30,3.64,0.88,29.60,1.12,1.12,7.18,2.57,3.53,12.17,3.07,0.90 +3.14,12.14,1.80,8.17,3.30,3.64,0.88,29.60,1.12,1.12,7.18,2.57,3.53,12.17,3.07,0.90 +3.14,12.14,1.80,8.17,3.30,3.64,0.88,29.60,1.12,1.12,7.18,2.57,3.53,12.17,3.07,0.90 +3.14,12.14,1.80,8.17,3.30,3.64,0.88,29.60,1.12,1.12,7.18,2.57,3.53,12.17,3.07,0.90 +3.14,12.14,1.80,8.17,3.30,3.64,0.88,29.60,1.12,1.12,7.18,2.57,3.53,12.17,3.07,0.90 +3.14,12.14,1.80,8.17,3.30,3.64,0.88,29.60,1.12,1.12,7.18,2.57,3.53,12.17,3.07,0.90 +3.14,12.14,1.80,8.17,3.30,3.64,0.88,29.60,1.12,1.12,7.18,2.57,3.53,12.17,3.07,0.90 +3.14,12.14,1.80,8.17,3.30,3.64,0.88,29.70,1.12,1.12,7.18,2.57,3.53,12.17,3.07,0.90 +3.14,12.14,1.80,8.17,3.30,3.64,0.88,29.70,1.12,1.12,7.18,2.57,3.53,12.17,3.07,0.90 +3.14,12.14,1.80,8.17,3.30,3.64,0.88,29.70,1.12,1.12,7.18,2.57,3.53,12.17,3.07,0.90 +3.14,12.14,1.80,8.17,3.30,3.60,0.88,29.70,1.12,1.12,7.18,2.57,3.53,12.17,3.07,0.90 +3.14,12.14,1.80,8.17,3.30,3.60,0.88,29.70,1.12,1.12,7.18,2.57,3.53,12.17,3.07,0.90 +3.14,12.14,1.80,8.17,3.30,3.60,0.88,29.70,1.12,1.12,7.18,2.57,3.53,12.17,3.10,0.90 +3.14,12.14,1.80,8.17,3.30,3.60,0.88,29.70,1.12,1.12,7.18,2.57,3.53,12.17,3.10,0.90 +3.14,12.14,1.80,8.17,3.30,3.60,0.88,29.70,1.12,1.12,7.18,2.57,3.53,12.17,3.10,0.90 +3.14,12.14,1.80,8.17,3.30,3.60,0.88,29.70,1.12,1.12,7.16,2.57,3.53,12.17,3.10,0.90 +3.14,12.14,1.80,8.17,3.30,3.60,0.88,29.70,1.12,1.12,7.16,2.57,3.53,12.17,3.10,0.90 +3.14,12.14,1.80,8.17,3.30,3.60,0.88,29.70,1.12,1.12,7.16,2.63,3.53,12.17,3.10,0.90 +3.14,12.14,1.80,8.17,3.30,3.60,0.88,29.70,1.12,1.12,7.16,2.63,3.53,12.17,3.10,0.90 +3.14,12.14,1.80,8.17,3.30,3.60,0.88,29.70,1.12,1.12,7.16,2.63,3.53,12.17,3.10,0.90 +3.14,12.14,1.80,8.26,3.30,3.60,0.88,29.70,1.12,1.12,7.16,2.63,3.53,12.17,3.10,0.90 +3.14,12.14,1.80,8.26,3.30,3.60,0.88,29.70,1.12,1.12,7.16,2.63,3.53,12.17,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.60,0.88,29.70,1.12,1.12,7.16,2.63,3.53,12.17,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.60,0.88,29.70,1.12,1.12,7.16,2.63,3.53,12.17,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.60,0.88,29.70,1.12,1.12,7.16,2.63,3.53,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.60,0.88,29.70,1.12,1.12,7.16,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.60,0.88,29.70,1.12,1.12,7.16,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.60,0.88,29.70,1.12,1.12,7.16,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.60,0.88,29.70,1.12,1.12,7.16,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.60,0.88,29.70,1.12,1.12,7.16,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.60,0.88,29.70,1.12,1.12,7.16,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.60,0.88,29.70,1.12,1.12,7.16,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.60,0.88,29.70,1.12,1.12,7.16,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.60,0.88,29.70,1.12,1.12,7.16,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.60,0.88,29.70,1.12,1.12,7.16,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.60,0.88,29.70,1.12,1.12,7.16,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.60,0.88,29.70,1.12,1.12,7.16,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.60,0.88,29.70,1.12,1.12,7.16,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.60,0.88,29.70,1.12,1.12,7.16,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.75,0.88,29.70,1.12,1.12,7.16,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.75,0.88,29.70,1.12,1.12,7.16,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.75,0.88,29.70,1.12,1.12,7.16,2.63,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.26,3.30,3.75,0.88,29.70,1.12,1.12,7.16,2.63,2.74,12.18,3.21,0.90 +2.88,12.15,1.80,8.26,3.30,3.75,0.88,29.70,1.12,1.12,7.16,2.63,2.74,12.18,3.21,0.90 +2.88,12.15,1.80,8.26,3.30,3.75,0.88,29.70,1.12,1.12,7.16,2.63,2.74,12.18,3.21,0.90 +2.88,12.15,1.80,8.26,3.30,3.75,0.88,29.70,1.12,1.12,7.15,2.63,2.74,12.18,3.21,0.90 +2.88,12.15,1.80,8.26,3.30,3.75,0.88,29.70,1.12,1.12,7.15,2.63,2.74,12.18,3.21,0.90 +2.88,12.15,1.80,8.26,3.30,3.75,0.88,29.70,1.12,1.12,7.15,2.57,2.74,12.18,3.21,0.90 +2.88,12.15,1.80,8.26,3.30,3.75,0.88,29.70,1.12,1.12,7.15,2.57,2.74,12.18,3.21,0.90 +2.88,12.15,1.80,8.26,3.30,3.75,0.88,29.70,1.12,1.12,7.15,2.57,2.74,12.18,3.21,0.90 +2.88,12.15,1.80,8.20,3.30,3.75,0.88,29.70,1.12,1.12,7.15,2.57,2.74,12.18,3.21,0.90 +2.88,12.15,1.80,8.20,3.30,3.75,0.88,29.70,1.12,1.12,7.15,2.57,2.74,12.18,3.21,0.90 +2.88,12.14,1.80,8.20,3.30,3.75,0.88,29.70,1.12,1.12,7.15,2.57,2.74,12.18,3.21,0.90 +2.88,12.14,1.80,8.20,3.30,3.75,0.88,29.70,1.12,1.12,7.15,2.57,2.74,12.18,3.21,0.90 +2.88,12.14,1.80,8.20,3.30,3.75,0.88,29.70,1.12,1.12,7.15,2.57,2.74,12.18,3.21,0.90 +2.88,12.14,1.80,8.20,3.30,3.75,0.88,29.70,1.12,1.12,7.15,2.57,2.76,12.18,3.21,0.90 +2.88,12.14,1.80,8.20,3.30,3.75,0.88,29.70,1.12,1.12,7.15,2.57,2.76,12.18,3.21,0.90 +2.88,12.14,1.80,8.20,3.30,3.75,0.88,29.70,1.12,1.12,7.15,2.57,2.76,12.18,3.21,0.90 +2.88,12.14,1.80,8.20,3.30,3.75,0.88,29.70,1.12,1.12,7.15,2.57,2.76,12.18,3.21,0.90 +2.88,12.14,1.80,8.20,3.30,3.75,0.88,29.70,1.12,1.12,7.15,2.57,2.76,12.18,3.21,0.90 +2.88,12.14,1.80,8.20,3.30,3.75,0.88,29.70,1.12,1.12,7.15,2.57,2.76,12.18,3.21,0.90 +2.88,12.14,1.80,8.20,3.30,3.75,0.88,29.70,1.12,1.12,7.15,2.57,2.76,12.18,3.21,0.90 +2.88,12.14,1.80,8.20,3.30,3.75,0.88,29.70,1.12,1.12,7.15,2.57,2.76,12.18,3.21,0.90 +2.88,12.14,1.80,8.20,3.30,3.75,0.88,29.70,1.12,1.12,7.15,2.57,2.76,12.18,3.21,0.90 +2.88,12.14,1.80,8.20,3.30,3.75,0.88,29.70,1.12,1.12,7.15,2.57,2.76,12.18,3.21,0.90 +2.88,12.14,1.80,8.20,3.30,3.75,0.88,29.70,1.12,1.12,7.15,2.57,2.76,12.18,3.21,0.90 +2.88,12.14,1.80,8.20,3.30,3.75,0.88,29.70,1.12,1.12,7.15,2.57,2.76,12.18,3.21,0.90 +2.88,12.14,1.80,8.20,3.30,3.75,0.88,29.70,1.12,1.12,7.15,2.57,2.76,12.18,3.21,0.90 +2.88,12.14,1.80,8.20,3.30,3.75,0.88,29.70,1.12,1.12,7.15,2.57,2.76,12.18,3.21,0.90 +2.88,12.14,1.80,8.20,3.30,3.61,0.88,29.70,1.12,1.12,7.15,2.57,2.76,12.18,3.21,0.90 +2.88,12.14,1.80,8.20,3.30,3.61,0.88,29.70,1.12,1.12,7.15,2.57,2.76,12.18,3.21,0.90 +2.88,12.14,1.80,8.20,3.30,3.61,0.88,29.70,1.12,1.12,7.15,2.57,2.76,12.18,3.21,0.90 +2.88,12.14,1.80,8.20,3.30,3.61,0.88,29.70,1.12,1.12,7.15,2.57,2.76,12.18,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.61,0.88,29.70,1.12,1.12,7.15,2.57,2.76,12.18,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.61,0.88,29.70,1.12,1.12,7.15,2.57,2.76,12.18,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.61,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.18,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.61,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.18,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.61,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.18,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.61,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.18,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.61,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.18,3.08,0.90 +2.88,12.14,1.80,8.14,3.30,3.61,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.18,3.08,0.90 +2.88,12.14,1.80,8.14,3.30,3.61,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.18,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.61,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.18,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.61,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.18,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.61,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.61,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.61,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.61,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.61,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.61,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.61,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.61,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.61,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.61,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.61,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.61,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.61,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.61,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.61,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.61,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.17,3.18,0.90 +2.88,12.15,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.20,2.57,2.76,12.17,3.18,0.90 +2.88,12.15,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.15,2.57,2.76,12.17,3.18,0.90 +2.88,12.15,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.15,2.57,2.76,12.17,3.18,0.90 +2.88,12.15,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.15,2.57,2.76,12.17,3.18,0.90 +2.88,12.15,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.15,2.63,2.76,12.17,3.18,0.90 +2.88,12.15,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.15,2.63,2.76,12.17,3.18,0.90 +2.88,12.15,1.80,8.14,3.30,3.67,0.88,29.70,1.12,1.12,7.15,2.63,2.76,12.17,3.18,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.70,1.12,1.12,7.15,2.63,2.76,12.17,3.18,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.70,1.12,1.12,7.15,2.63,2.76,12.17,3.18,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.70,1.12,1.12,7.15,2.63,2.76,12.17,3.18,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.70,1.12,1.12,7.15,2.63,2.76,12.17,3.18,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.70,1.12,1.12,7.15,2.63,2.76,12.17,3.18,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.70,1.12,1.12,7.15,2.63,2.74,12.17,3.18,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.70,1.12,1.12,7.15,2.63,2.74,12.17,3.18,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.70,1.12,1.12,7.15,2.63,2.74,12.17,3.18,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.70,1.12,1.12,7.15,2.63,2.74,12.17,3.18,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.70,1.12,1.12,7.15,2.63,2.74,12.17,3.18,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.70,1.12,1.12,7.15,2.63,2.74,12.17,3.18,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.70,1.12,1.12,7.15,2.63,2.74,12.17,3.18,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.70,1.12,1.12,7.15,2.63,2.74,12.17,3.18,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.70,1.12,1.12,7.15,2.63,2.74,12.17,3.18,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.70,1.12,1.12,7.15,2.63,2.74,12.17,3.18,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.70,1.12,1.12,7.15,2.63,2.74,12.17,3.18,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.63,2.74,12.17,3.18,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.63,2.74,12.17,3.18,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.63,2.74,12.17,3.18,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.63,2.74,12.17,3.18,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.63,2.74,12.17,3.18,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.63,2.74,12.17,3.06,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.63,2.74,12.17,3.06,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.63,2.74,12.17,3.06,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.60,1.12,1.12,7.19,2.63,2.74,12.17,3.06,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.60,1.12,1.12,7.19,2.63,2.74,12.17,3.06,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.60,1.12,1.12,7.19,2.63,2.74,12.17,3.06,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.60,1.12,1.12,7.19,2.59,2.74,12.17,3.06,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.60,1.12,1.12,7.19,2.59,2.74,12.17,3.06,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.60,1.12,1.12,7.19,2.59,2.74,12.17,3.06,0.90 +2.88,12.15,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.19,2.59,2.74,12.17,3.06,0.90 +2.88,12.15,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.19,2.59,2.74,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.19,2.59,2.74,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.19,2.59,2.74,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.19,2.59,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.19,2.59,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.19,2.59,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.19,2.59,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.19,2.59,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.19,2.59,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.19,2.59,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.19,2.59,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.19,2.59,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.19,2.59,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.19,2.59,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.19,2.59,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,29.60,1.12,1.12,7.19,2.59,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.59,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.59,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.59,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.70,1.12,1.12,7.19,2.59,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.70,1.12,1.12,7.19,2.59,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.70,1.12,1.12,7.19,2.59,2.76,12.17,3.06,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.70,1.12,1.12,7.19,2.59,2.76,12.17,3.13,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.70,1.12,1.12,7.19,2.59,2.76,12.17,3.13,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.70,1.12,1.12,7.20,2.59,2.76,12.17,3.13,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.70,1.12,1.12,7.20,2.59,2.76,12.17,3.13,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.70,1.12,1.12,7.20,2.59,2.76,12.17,3.13,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.70,1.12,1.12,7.20,2.63,2.76,12.17,3.13,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.70,1.12,1.12,7.20,2.63,2.76,12.17,3.13,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.70,1.12,1.12,7.20,2.63,2.76,12.17,3.13,0.90 +2.88,12.14,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.20,2.63,2.76,12.17,3.13,0.90 +2.88,12.15,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.20,2.63,2.76,12.17,3.13,0.90 +2.90,12.15,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.20,2.63,2.76,12.17,3.13,0.90 +2.90,12.15,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.20,2.63,2.76,12.17,3.13,0.90 +2.90,12.15,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.20,2.63,2.74,12.18,3.13,0.90 +2.90,12.15,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.20,2.63,2.74,12.18,3.13,0.90 +2.90,12.15,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.20,2.63,2.74,12.18,3.13,0.90 +2.90,12.15,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.20,2.63,2.74,12.18,3.13,0.90 +2.90,12.15,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.20,2.63,2.74,12.18,3.13,0.90 +2.90,12.15,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.20,2.63,2.74,12.18,3.13,0.90 +2.90,12.15,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.20,2.63,2.74,12.18,3.13,0.90 +2.90,12.15,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.20,2.63,2.74,12.18,3.13,0.90 +2.90,12.15,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.20,2.63,2.74,12.18,3.13,0.90 +2.90,12.15,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.20,2.63,2.74,12.18,3.13,0.90 +2.90,12.15,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.20,2.63,2.74,12.18,3.13,0.90 +2.90,12.15,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.20,2.63,2.74,12.18,3.13,0.90 +2.90,12.15,1.80,8.15,3.30,3.66,0.88,29.70,1.12,1.12,7.20,2.63,2.74,12.18,3.13,0.90 +2.90,12.15,1.80,8.15,3.30,3.66,0.88,29.60,1.12,1.12,7.20,2.63,2.74,12.18,3.13,0.90 +2.90,12.15,1.80,8.15,3.30,3.66,0.88,29.60,1.12,1.12,7.20,2.63,2.74,12.18,3.13,0.90 +2.90,12.15,1.80,8.15,3.30,3.66,0.88,29.60,1.12,1.12,7.20,2.63,2.74,12.18,3.13,0.90 +2.90,12.15,1.80,8.15,3.30,3.51,0.88,29.60,1.12,1.12,7.20,2.63,2.74,12.18,3.13,0.90 +2.90,12.15,1.80,8.15,3.30,3.51,0.88,29.60,1.12,1.12,7.20,2.63,2.74,12.18,3.13,0.90 +2.90,12.15,1.80,8.15,3.30,3.51,0.88,29.60,1.12,1.12,7.20,2.63,2.74,12.18,3.08,0.90 +2.90,12.15,1.80,8.15,3.30,3.51,0.88,29.60,1.12,1.12,7.20,2.63,2.74,12.18,3.08,0.90 +2.90,12.15,1.80,8.15,3.30,3.51,0.88,29.60,1.12,1.12,7.20,2.63,2.74,12.18,3.08,0.90 +2.90,12.15,1.80,8.15,3.30,3.51,0.88,29.60,1.12,1.12,7.19,2.63,2.74,12.18,3.08,0.90 +2.90,12.15,1.80,8.15,3.30,3.51,0.88,29.60,1.12,1.12,7.19,2.63,2.74,12.18,3.08,0.90 +2.90,12.15,1.80,8.15,3.30,3.51,0.88,29.60,1.12,1.12,7.19,2.63,2.74,12.18,3.08,0.90 +2.90,12.15,1.80,8.15,3.30,3.51,0.88,29.60,1.12,1.12,7.19,2.64,2.74,12.18,3.08,0.90 +2.90,12.15,1.80,8.15,3.30,3.51,0.88,29.60,1.12,1.12,7.19,2.64,2.74,12.18,3.08,0.90 +2.90,12.15,1.80,8.10,3.30,3.51,0.88,29.60,1.12,1.12,7.19,2.64,2.74,12.18,3.08,0.90 +2.90,12.15,1.80,8.10,3.30,3.51,0.88,29.60,1.12,1.12,7.19,2.64,2.74,12.18,3.08,0.90 +2.90,12.15,1.80,8.10,3.30,3.51,0.88,29.60,1.12,1.12,7.19,2.64,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.10,3.30,3.51,0.88,29.60,1.12,1.12,7.19,2.64,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.10,3.30,3.51,0.88,29.60,1.12,1.12,7.19,2.64,2.74,12.17,3.08,0.90 +2.88,12.15,1.80,8.10,3.30,3.51,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.10,3.30,3.51,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.10,3.30,3.51,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.10,3.30,3.51,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.10,3.30,3.51,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.10,3.30,3.51,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.10,3.30,3.51,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.10,3.30,3.51,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.10,3.30,3.51,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.10,3.30,3.51,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.10,3.30,3.51,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.10,3.30,3.51,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.10,3.30,3.51,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.10,3.30,3.51,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.10,3.30,3.51,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.19,2.64,2.76,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.21,2.64,2.76,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.21,2.64,2.76,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.21,2.65,2.76,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.21,2.65,2.76,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.21,2.65,2.76,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.21,2.65,2.76,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.21,2.65,2.76,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.21,2.65,2.76,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.21,2.65,2.76,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.21,2.65,2.76,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.21,2.65,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.21,2.65,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.21,2.65,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.21,2.65,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.21,2.65,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.21,2.65,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.21,2.65,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.21,2.65,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.21,2.65,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.21,2.65,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.21,2.65,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,29.60,1.12,1.12,7.21,2.65,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,36.70,1.12,1.12,7.21,2.65,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.69,0.88,36.70,1.12,1.12,7.21,2.65,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.65,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.65,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.65,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.65,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.65,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.65,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.65,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.65,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.60,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.60,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.10,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.60,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.09,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.60,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.09,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.60,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.09,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.60,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.09,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.60,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.09,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.60,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.09,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.60,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.09,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.60,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.09,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.60,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.09,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.60,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.09,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.60,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.09,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.60,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.09,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.60,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.09,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.60,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.09,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.60,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.09,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.60,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.09,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.60,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.09,3.30,3.98,0.88,36.70,1.12,1.12,7.21,2.60,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.09,3.30,3.98,0.88,29.60,1.12,1.12,7.21,2.60,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.09,3.30,3.98,0.88,29.60,1.12,1.12,7.21,2.60,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.09,3.30,3.98,0.88,29.60,1.12,1.12,7.21,2.60,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.09,3.30,3.67,0.88,29.60,1.12,1.12,7.21,2.60,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.09,3.30,3.67,0.88,29.60,1.12,1.12,7.21,2.60,2.74,12.17,3.13,0.90 +2.88,12.15,1.80,8.09,3.30,3.67,0.88,29.60,1.12,1.12,7.21,2.60,2.74,12.17,3.03,0.90 +2.88,12.15,1.80,8.09,3.30,3.67,0.88,29.60,1.12,1.12,7.21,2.60,2.74,12.17,3.03,0.90 +2.88,12.15,1.80,8.09,3.30,3.67,0.88,29.60,1.12,1.12,7.21,2.60,2.74,12.17,3.03,0.90 +2.88,12.15,1.80,8.09,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.60,2.74,12.17,3.03,0.90 +2.88,12.15,1.80,8.09,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.60,2.74,12.17,3.03,0.90 +2.88,12.15,1.80,8.09,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.60,2.74,12.17,3.03,0.90 +2.88,12.15,1.80,8.09,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.66,2.74,12.17,3.03,0.90 +2.88,12.15,1.80,8.09,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.66,2.74,12.17,3.03,0.90 +2.88,12.15,1.80,8.17,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.66,2.74,12.17,3.03,0.90 +2.88,12.15,1.80,8.17,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.66,2.74,12.17,3.03,0.90 +2.88,12.15,1.80,8.17,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.66,2.74,12.17,3.03,0.90 +2.88,12.15,1.80,8.17,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.66,2.74,12.17,3.03,0.90 +2.88,12.15,1.80,8.17,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.66,2.74,12.17,3.03,0.90 +2.88,12.15,1.80,8.17,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.66,2.76,12.17,3.03,0.90 +2.88,12.15,1.80,8.17,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.66,2.76,12.17,3.03,0.90 +2.88,12.15,1.80,8.17,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.66,2.76,12.17,3.03,0.90 +2.88,12.15,1.80,8.17,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.66,2.76,12.17,3.03,0.90 +2.88,12.15,1.80,8.17,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.66,2.76,12.17,3.03,0.90 +2.88,12.15,1.80,8.17,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.66,2.76,12.17,3.03,0.90 +2.88,12.15,1.80,8.17,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.66,2.76,12.17,3.03,0.90 +2.88,12.15,1.80,8.17,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.66,2.76,12.17,3.03,0.90 +2.88,12.15,1.80,8.17,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.66,2.76,12.17,3.03,0.90 +2.88,12.15,1.80,8.17,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.66,2.76,12.17,3.03,0.90 +2.88,12.15,1.80,8.17,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.66,2.76,12.17,3.03,0.90 +2.88,12.15,1.80,8.17,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.66,2.76,12.17,3.03,0.90 +2.88,12.15,1.80,8.17,3.30,3.67,0.88,29.60,1.12,1.12,7.15,2.66,2.76,12.17,3.03,0.90 +2.88,12.15,1.80,8.17,3.30,3.67,0.88,29.70,1.12,1.12,7.15,2.66,2.76,12.17,3.03,0.90 +2.88,12.15,1.80,8.17,3.30,3.67,0.88,29.70,1.12,1.12,7.15,2.66,2.76,12.17,3.03,0.90 +2.88,12.15,1.80,8.17,3.30,3.65,0.88,29.70,1.12,1.12,7.15,2.66,2.76,12.17,3.03,0.90 +2.88,12.15,1.80,8.17,3.30,3.65,0.88,29.70,1.12,1.12,7.15,2.66,2.76,12.17,3.03,0.90 +2.88,12.15,1.80,8.17,3.30,3.65,0.88,29.70,1.12,1.12,7.15,2.66,2.76,12.17,3.03,0.90 +2.88,12.15,1.80,8.17,3.30,3.65,0.88,29.70,1.12,1.12,7.15,2.66,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.65,0.88,29.70,1.12,1.12,7.15,2.66,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.65,0.88,29.70,1.12,1.12,7.23,2.66,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.65,0.88,29.70,1.12,1.12,7.23,2.66,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.65,0.88,29.70,1.12,1.12,7.23,2.66,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.65,0.88,29.70,1.12,1.12,7.23,2.62,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.17,3.30,3.65,0.88,29.70,1.12,1.12,7.23,2.62,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.23,2.62,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.23,2.62,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.23,2.62,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.23,2.62,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.23,2.62,2.76,12.18,3.07,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.23,2.62,2.74,12.18,3.07,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.23,2.62,2.74,12.18,3.07,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.23,2.62,2.74,12.18,3.07,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.23,2.62,2.74,12.18,3.07,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.23,2.62,2.74,12.18,3.07,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.23,2.62,2.74,12.18,3.07,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.23,2.62,2.74,12.18,3.07,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.23,2.62,2.74,12.18,3.07,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.23,2.62,2.74,12.18,3.07,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.23,2.62,2.74,12.18,3.07,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.23,2.62,2.74,12.18,3.07,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.23,2.62,2.74,12.18,3.07,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.23,2.62,2.74,12.18,3.07,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.23,2.62,2.74,12.18,3.07,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.23,2.62,2.74,12.18,3.07,0.90 +2.88,12.15,1.80,8.18,3.30,3.70,0.88,29.70,1.12,1.12,7.23,2.62,2.74,12.18,3.07,0.90 +2.88,12.15,1.80,8.18,3.30,3.70,0.88,29.70,1.12,1.12,7.23,2.62,2.74,12.18,3.07,0.90 +2.88,12.15,1.80,8.18,3.30,3.70,0.88,29.70,1.12,1.12,7.23,2.62,2.74,12.18,3.07,0.90 +2.88,12.15,1.80,8.18,3.30,3.70,0.88,29.70,1.12,1.12,7.23,2.62,2.74,12.18,3.13,0.90 +2.88,12.15,1.80,8.18,3.30,3.70,0.88,29.70,1.12,1.12,7.23,2.62,2.74,12.18,3.13,0.90 +2.88,12.15,1.80,8.18,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.62,2.74,12.18,3.13,0.90 +2.88,12.15,1.80,8.18,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.62,2.74,12.18,3.13,0.90 +2.88,12.15,1.80,8.18,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.62,2.74,12.18,3.13,0.90 +2.88,12.15,1.80,8.18,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.60,2.74,12.18,3.13,0.90 +2.88,12.15,1.80,8.18,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.60,2.74,12.18,3.13,0.90 +2.88,12.15,1.80,8.18,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.60,2.74,12.18,3.13,0.90 +2.88,12.15,1.80,8.23,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.60,2.74,12.18,3.13,0.90 +2.88,12.14,1.80,8.23,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.60,2.74,12.18,3.13,0.90 +2.86,12.14,1.80,8.23,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.60,2.74,12.18,3.13,0.90 +2.86,12.14,1.80,8.23,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.60,2.74,12.18,3.13,0.90 +2.86,12.14,1.80,8.23,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.60,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.23,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.60,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.23,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.60,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.23,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.60,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.23,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.60,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.23,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.60,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.23,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.60,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.23,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.60,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.23,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.60,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.23,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.60,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.23,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.60,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.23,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.60,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.23,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.60,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.23,3.30,3.70,0.88,39.10,1.12,1.12,7.16,2.60,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.23,3.30,3.70,0.88,39.10,1.12,1.12,7.16,2.60,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.23,3.30,3.99,0.88,39.10,1.12,1.12,7.16,2.60,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.23,3.30,3.99,0.88,39.10,1.12,1.12,7.16,2.60,2.76,12.17,3.13,0.90 +2.86,12.14,1.80,8.23,3.30,3.99,0.88,39.10,1.12,1.12,7.16,2.60,2.76,12.17,3.34,0.90 +2.86,12.14,1.80,8.23,3.30,3.99,0.88,39.10,1.12,1.12,7.16,2.60,2.76,12.17,3.34,0.90 +2.86,12.14,1.80,8.23,3.30,3.99,0.88,39.10,1.12,1.12,7.16,2.60,2.76,12.17,3.34,0.90 +2.86,12.14,1.80,8.23,3.30,3.99,0.88,39.10,1.12,1.12,7.22,2.60,2.76,12.17,3.34,0.90 +2.86,12.14,1.80,8.23,3.30,3.99,0.88,39.10,1.12,1.12,7.22,2.60,2.76,12.17,3.34,0.90 +2.86,12.14,1.80,8.23,3.30,3.99,0.88,39.10,1.12,1.12,7.22,2.60,2.76,12.17,3.34,0.90 +2.86,12.14,1.80,8.23,3.30,3.99,0.88,39.10,1.12,1.12,7.22,2.60,2.76,12.17,3.34,0.90 +2.86,12.14,1.80,8.23,3.30,3.99,0.88,39.10,1.12,1.12,7.22,2.60,2.76,12.17,3.34,0.90 +2.86,12.14,1.80,8.18,3.30,3.99,0.88,39.10,1.12,1.12,7.22,2.60,2.76,12.17,3.34,0.90 +2.86,12.14,1.80,8.18,3.30,3.99,0.88,39.10,1.12,1.12,7.22,2.60,2.76,12.17,3.34,0.90 +2.86,12.15,1.80,8.18,3.30,3.99,0.88,39.10,1.12,1.12,7.22,2.60,2.76,12.17,3.34,0.90 +2.88,12.15,1.80,8.18,3.30,3.99,0.88,39.10,1.12,1.12,7.22,2.60,2.76,12.17,3.34,0.90 +2.88,12.15,1.80,8.18,3.30,3.99,0.88,39.10,1.12,1.12,7.22,2.60,2.76,12.18,3.34,0.90 +2.88,12.15,1.80,8.18,3.30,3.99,0.88,39.10,1.12,1.12,7.22,2.60,2.74,12.18,3.34,0.90 +2.88,12.15,1.80,8.18,3.30,3.99,0.88,39.10,1.12,1.12,7.22,2.60,2.74,12.18,3.34,0.90 +2.88,12.15,1.80,8.18,3.30,3.99,0.88,39.10,1.12,1.12,7.22,2.60,2.74,12.18,3.34,0.90 +2.88,12.15,1.80,8.18,3.30,3.99,0.88,39.10,1.12,1.12,7.22,2.60,2.74,12.18,3.34,0.90 +2.88,12.15,1.80,8.18,3.30,3.99,0.88,39.10,1.12,1.12,7.22,2.60,2.74,12.18,3.34,0.90 +2.88,12.15,1.80,8.18,3.30,3.99,0.88,39.10,1.12,1.12,7.22,2.60,2.74,12.18,3.34,0.90 +2.88,12.15,1.80,8.18,3.30,3.99,0.88,39.10,1.12,1.12,7.22,2.60,2.74,12.18,3.34,0.90 +2.88,12.15,1.80,8.18,3.30,3.99,0.88,39.10,1.12,1.12,7.22,2.60,2.74,12.18,3.34,0.90 +2.88,12.15,1.80,8.18,3.30,3.99,0.88,39.10,1.12,1.12,7.22,2.60,2.74,12.18,3.34,0.90 +2.88,12.15,1.80,8.18,3.30,3.99,0.88,39.10,1.12,1.12,7.22,2.60,2.74,12.18,3.34,0.90 +2.88,12.15,1.80,8.18,3.30,3.99,0.88,39.10,1.12,1.12,7.22,2.60,2.74,12.18,3.34,0.90 +2.88,12.15,1.80,8.18,3.30,3.99,0.88,39.10,1.12,1.12,7.22,2.60,2.74,12.18,3.34,0.90 +2.88,12.15,1.80,8.18,3.30,3.99,0.88,29.60,1.12,1.12,7.22,2.60,2.74,12.18,3.34,0.90 +2.88,12.15,1.80,8.18,3.30,3.99,0.88,29.60,1.12,1.12,7.22,2.60,2.74,12.18,3.34,0.90 +2.88,12.15,1.80,8.18,3.30,3.99,0.88,29.60,1.12,1.12,7.22,2.60,2.74,12.18,3.34,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.60,2.74,12.18,3.34,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.60,2.74,12.18,3.34,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.60,2.74,12.18,3.34,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.60,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.22,2.60,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.18,3.16,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.17,3.16,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.12,2.60,2.74,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.18,2.55,2.74,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.18,2.55,2.74,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.18,2.55,2.74,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.18,2.55,2.74,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.18,2.55,2.74,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.18,2.55,2.74,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.18,2.55,2.74,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.18,2.55,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.18,2.55,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.18,2.55,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.18,2.55,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.18,2.55,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.18,2.55,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.18,2.55,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.18,2.55,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.18,2.55,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.18,2.55,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.18,2.55,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.60,1.12,1.12,7.18,2.55,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.50,1.12,1.12,7.18,2.55,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.50,1.12,1.12,7.18,2.55,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.65,0.88,29.50,1.12,1.12,7.18,2.55,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.55,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.55,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.55,2.76,12.17,3.07,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.55,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.55,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.55,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.55,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.55,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.62,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.62,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.14,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.62,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.06,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.62,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.06,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.62,2.76,12.17,3.08,0.90 +2.86,12.15,1.80,8.06,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.62,2.76,12.17,3.08,0.90 +2.86,12.15,1.80,8.06,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.62,2.76,12.17,3.08,0.90 +2.86,12.15,1.80,8.06,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.62,2.74,12.17,3.08,0.90 +2.86,12.15,1.80,8.06,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.62,2.74,12.17,3.08,0.90 +2.86,12.15,1.80,8.06,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.62,2.74,12.17,3.08,0.90 +2.86,12.15,1.80,8.06,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.62,2.74,12.17,3.08,0.90 +2.86,12.15,1.80,8.06,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.62,2.74,12.17,3.08,0.90 +2.86,12.15,1.80,8.06,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.62,2.74,12.17,3.08,0.90 +2.86,12.15,1.80,8.06,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.62,2.74,12.17,3.08,0.90 +2.86,12.15,1.80,8.06,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.62,2.74,12.17,3.08,0.90 +2.86,12.15,1.80,8.06,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.62,2.74,12.17,3.08,0.90 +2.86,12.15,1.80,8.06,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.62,2.74,12.17,3.08,0.90 +2.86,12.15,1.80,8.06,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.62,2.74,12.17,3.08,0.90 +2.86,12.15,1.80,8.06,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.62,2.74,12.17,3.08,0.90 +2.86,12.15,1.80,8.06,3.30,3.66,0.88,29.50,1.12,1.12,7.18,2.62,2.74,12.17,3.08,0.90 +2.86,12.15,1.80,8.06,3.30,3.66,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.08,0.90 +2.86,12.15,1.80,8.06,3.30,3.66,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.08,0.90 +2.86,12.15,1.80,8.06,3.30,3.66,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.08,0.90 +2.86,12.15,1.80,8.06,3.30,3.69,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.08,0.90 +2.86,12.15,1.80,8.06,3.30,3.69,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.08,0.90 +2.86,12.15,1.80,8.06,3.30,3.69,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.05,0.90 +2.86,12.15,1.80,8.06,3.30,3.69,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.05,0.90 +2.86,12.15,1.80,8.06,3.30,3.69,0.88,29.60,1.12,1.12,7.18,2.62,2.74,12.17,3.05,0.90 +2.86,12.15,1.80,8.06,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.62,2.74,12.17,3.05,0.90 +2.86,12.15,1.80,8.06,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.62,2.74,12.17,3.05,0.90 +2.86,12.15,1.80,8.06,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.05,0.90 +2.86,12.15,1.80,8.06,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.05,0.90 +2.86,12.15,1.80,8.06,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.05,0.90 +2.86,12.15,1.80,8.23,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.05,0.90 +2.86,12.14,1.80,8.23,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.05,0.90 +2.90,12.14,1.80,8.23,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.05,0.90 +2.90,12.14,1.80,8.23,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.05,0.90 +2.90,12.14,1.80,8.23,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.05,0.90 +2.90,12.14,1.80,8.23,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.05,0.90 +2.90,12.14,1.80,8.23,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.05,0.90 +2.90,12.14,1.80,8.23,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.05,0.90 +2.90,12.14,1.80,8.23,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.05,0.90 +2.90,12.14,1.80,8.23,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.05,0.90 +2.90,12.14,1.80,8.23,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.05,0.90 +2.90,12.14,1.80,8.23,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.05,0.90 +2.90,12.14,1.80,8.23,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.05,0.90 +2.90,12.14,1.80,8.23,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.05,0.90 +2.90,12.14,1.80,8.23,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.05,0.90 +2.90,12.14,1.80,8.23,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.05,0.90 +2.90,12.14,1.80,8.23,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.05,0.90 +2.90,12.14,1.80,8.23,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.05,0.90 +2.90,12.14,1.80,8.23,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.05,0.90 +2.90,12.14,1.80,8.23,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.05,0.90 +2.90,12.14,1.80,8.23,3.30,3.65,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.05,0.90 +2.90,12.14,1.80,8.23,3.30,3.65,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.05,0.90 +2.90,12.14,1.80,8.23,3.30,3.65,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.12,0.90 +2.90,12.14,1.80,8.23,3.30,3.65,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.12,0.90 +2.90,12.14,1.80,8.23,3.30,3.65,0.88,29.60,1.12,1.12,7.20,2.64,2.74,12.17,3.12,0.90 +2.90,12.14,1.80,8.23,3.30,3.65,0.88,29.60,1.12,1.12,7.16,2.64,2.74,12.17,3.12,0.90 +2.90,12.14,1.80,8.23,3.30,3.65,0.88,29.60,1.12,1.12,7.16,2.64,2.74,12.17,3.12,0.90 +2.90,12.14,1.80,8.23,3.30,3.65,0.88,29.60,1.12,1.12,7.16,2.64,2.74,12.17,3.12,0.90 +2.90,12.14,1.80,8.23,3.30,3.65,0.88,29.60,1.12,1.12,7.16,2.61,2.74,12.17,3.12,0.90 +2.90,12.14,1.80,8.23,3.30,3.65,0.88,29.60,1.12,1.12,7.16,2.61,2.74,12.17,3.12,0.90 +2.90,12.14,1.80,8.09,3.30,3.65,0.88,29.60,1.12,1.12,7.16,2.61,2.74,12.17,3.12,0.90 +2.90,12.14,1.80,8.09,3.30,3.65,0.88,29.60,1.12,1.12,7.16,2.61,2.74,12.17,3.12,0.90 +2.90,12.15,1.80,8.09,3.30,3.65,0.88,29.60,1.12,1.12,7.16,2.61,2.74,12.17,3.12,0.90 +2.88,12.15,1.80,8.09,3.30,3.65,0.88,29.60,1.12,1.12,7.16,2.61,2.74,12.17,3.12,0.90 +2.88,12.15,1.80,8.09,3.30,3.65,0.88,29.60,1.12,1.12,7.16,2.61,2.74,12.18,3.12,0.90 +2.88,12.15,1.80,8.09,3.30,3.65,0.88,29.60,1.12,1.12,7.16,2.61,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.09,3.30,3.65,0.88,29.60,1.12,1.12,7.16,2.61,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.09,3.30,3.65,0.88,29.60,1.12,1.12,7.16,2.61,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.09,3.30,3.65,0.88,29.60,1.12,1.12,7.16,2.61,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.09,3.30,3.65,0.88,29.60,1.12,1.12,7.16,2.61,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.09,3.30,3.65,0.88,29.60,1.12,1.12,7.16,2.61,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.09,3.30,3.65,0.88,29.60,1.12,1.12,7.16,2.61,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.09,3.30,3.65,0.88,29.60,1.12,1.12,7.16,2.61,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.09,3.30,3.65,0.88,29.60,1.12,1.12,7.16,2.61,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.09,3.30,3.65,0.88,29.60,1.12,1.12,7.16,2.61,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.09,3.30,3.65,0.88,29.60,1.12,1.12,7.16,2.61,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.09,3.30,3.65,0.88,29.60,1.12,1.12,7.16,2.61,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.09,3.30,3.65,0.88,29.60,1.12,1.12,7.16,2.61,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.09,3.30,3.65,0.88,29.60,1.12,1.12,7.16,2.61,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.09,3.30,3.65,0.88,29.60,1.12,1.12,7.16,2.61,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.09,3.30,3.71,0.88,29.60,1.12,1.12,7.16,2.61,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.09,3.30,3.71,0.88,29.60,1.12,1.12,7.16,2.61,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.09,3.30,3.71,0.88,29.60,1.12,1.12,7.16,2.61,2.76,12.18,3.12,0.90 +2.88,12.15,1.80,8.09,3.30,3.71,0.88,29.60,1.12,1.12,7.16,2.61,2.76,12.18,3.15,0.90 +2.88,12.15,1.80,8.09,3.30,3.71,0.88,29.60,1.12,1.12,7.16,2.61,2.76,12.18,3.15,0.90 +2.88,12.15,1.80,8.09,3.30,3.71,0.88,29.60,1.12,1.12,7.16,2.61,2.76,12.18,3.15,0.90 +2.88,12.15,1.80,8.09,3.30,3.71,0.88,29.60,1.12,1.12,7.20,2.61,2.76,12.18,3.15,0.90 +2.88,12.15,1.80,8.09,3.30,3.71,0.88,29.60,1.12,1.12,7.20,2.61,2.76,12.18,3.15,0.90 +2.88,12.15,1.80,8.09,3.30,3.71,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.18,3.15,0.90 +2.88,12.15,1.80,8.09,3.30,3.71,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.18,3.15,0.90 +2.88,12.15,1.80,8.09,3.30,3.71,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.18,3.15,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.18,3.15,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.18,3.15,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.18,3.15,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.18,3.15,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.17,3.15,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.17,3.15,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.17,3.15,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.17,3.15,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.17,3.15,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.17,3.15,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.17,3.15,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.17,3.15,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.17,3.15,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.17,3.15,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.17,3.15,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.17,3.15,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.17,3.15,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.17,3.15,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.17,3.15,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.17,3.15,0.90 +2.88,12.15,1.80,8.15,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.17,3.15,0.90 +2.88,12.15,1.80,8.15,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.17,3.15,0.90 +2.88,12.15,1.80,8.15,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.17,3.04,0.90 +2.88,12.15,1.80,8.15,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.17,3.04,0.90 +2.88,12.15,1.80,8.15,3.30,3.69,0.88,29.60,1.12,1.12,7.20,2.57,2.76,12.17,3.04,0.90 +2.88,12.15,1.80,8.15,3.30,3.69,0.88,29.60,1.12,1.12,7.22,2.57,2.76,12.17,3.04,0.90 +2.88,12.15,1.80,8.15,3.30,3.69,0.88,29.60,1.12,1.12,7.22,2.57,2.76,12.17,3.04,0.90 +2.88,12.15,1.80,8.15,3.30,3.69,0.88,29.60,1.12,1.12,7.22,2.57,2.76,12.17,3.04,0.90 +2.88,12.15,1.80,8.15,3.30,3.69,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.04,0.90 +2.88,12.15,1.80,8.15,3.30,3.69,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.04,0.90 +2.88,12.15,1.80,8.15,3.30,3.69,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.04,0.90 +2.88,12.15,1.80,8.18,3.30,3.69,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.04,0.90 +2.88,12.14,1.80,8.18,3.30,3.69,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.04,0.90 +2.88,12.14,1.80,8.18,3.30,3.69,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.04,0.90 +2.88,12.14,1.80,8.18,3.30,3.69,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.04,0.90 +2.88,12.14,1.80,8.18,3.30,3.69,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.04,0.90 +2.88,12.14,1.80,8.18,3.30,3.69,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.04,0.90 +2.88,12.14,1.80,8.18,3.30,3.69,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.04,0.90 +2.88,12.14,1.80,8.18,3.30,3.69,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.04,0.90 +2.88,12.14,1.80,8.18,3.30,3.69,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.04,0.90 +2.88,12.14,1.80,8.18,3.30,3.69,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.04,0.90 +2.88,12.14,1.80,8.18,3.30,3.69,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.04,0.90 +2.88,12.14,1.80,8.18,3.30,3.69,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.04,0.90 +2.88,12.14,1.80,8.18,3.30,3.69,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.04,0.90 +2.88,12.14,1.80,8.18,3.30,3.69,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.04,0.90 +2.88,12.14,1.80,8.18,3.30,3.69,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.04,0.90 +2.88,12.14,1.80,8.18,3.30,3.69,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.04,0.90 +2.88,12.14,1.80,8.18,3.30,3.69,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.04,0.90 +2.88,12.14,1.80,8.18,3.30,3.69,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.04,0.90 +2.88,12.14,1.80,8.18,3.30,3.69,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.04,0.90 +2.88,12.14,1.80,8.18,3.30,3.69,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.04,0.90 +2.88,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.04,0.90 +2.88,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.04,0.90 +2.88,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.22,2.58,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.12,2.58,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.12,2.58,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.12,2.58,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.12,2.63,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.18,3.30,3.70,0.88,29.60,1.12,1.12,7.12,2.63,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.54,3.30,3.70,0.88,29.60,1.12,1.12,7.12,2.63,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.54,3.30,3.70,0.88,29.60,1.12,1.12,7.12,2.63,2.76,12.17,3.09,0.90 +3.12,12.14,1.80,8.54,3.30,3.70,0.88,29.60,1.12,1.12,7.12,2.63,2.76,12.17,3.09,0.90 +3.12,12.14,1.80,8.54,3.30,3.70,0.88,29.60,1.12,1.12,7.12,2.63,2.76,12.17,3.09,0.90 +3.12,12.14,1.80,8.54,3.30,3.70,0.88,29.60,1.12,1.12,7.12,2.63,3.33,12.17,3.09,0.90 +3.12,12.14,1.80,8.54,3.30,3.70,0.88,29.60,1.12,1.12,7.12,2.63,3.33,12.17,3.09,0.90 +3.12,12.14,1.80,8.54,3.30,3.70,0.88,29.60,1.12,1.12,7.12,2.63,3.33,12.17,3.09,0.90 +3.12,12.14,1.80,8.54,3.30,3.70,0.88,29.60,1.12,1.12,7.12,2.63,3.33,12.17,3.09,0.90 +3.12,12.14,1.80,8.54,3.30,3.70,0.88,29.60,1.12,1.12,7.12,2.63,3.33,12.17,3.09,0.90 +3.12,12.14,1.80,8.54,3.30,3.70,0.88,29.60,1.12,1.12,7.12,2.63,3.33,12.17,3.09,0.90 +3.12,12.14,1.80,8.54,3.30,3.70,0.88,29.60,1.12,1.12,7.12,2.63,3.33,12.17,3.09,0.90 +3.12,12.14,1.80,8.54,3.30,3.70,0.88,29.60,1.12,1.12,7.12,2.63,3.33,12.17,3.09,0.90 +3.12,12.14,1.80,8.54,3.30,3.70,0.88,29.60,1.12,1.12,7.12,2.63,3.33,12.17,3.09,0.90 +3.12,12.14,1.80,8.54,3.30,3.70,0.88,29.60,1.12,1.12,7.12,2.63,3.33,12.17,3.09,0.90 +3.12,12.14,1.80,8.54,3.30,3.70,0.88,29.60,1.12,1.12,7.12,2.63,3.33,12.17,3.09,0.90 +3.12,12.14,1.80,8.54,3.30,3.70,0.88,29.60,1.12,1.12,7.12,2.63,3.33,12.17,3.09,0.90 +3.12,12.14,1.80,8.54,3.30,3.70,0.88,29.60,1.12,1.12,7.12,2.63,3.33,12.17,3.09,0.90 +3.12,12.14,1.80,8.54,3.30,3.70,0.88,29.70,1.12,1.12,7.12,2.63,3.33,12.17,3.09,0.90 +3.12,12.14,1.80,8.54,3.30,3.70,0.88,29.70,1.12,1.12,7.12,2.63,3.33,12.17,3.09,0.90 +3.12,12.14,1.80,8.54,3.30,3.67,0.88,29.70,1.12,1.12,7.12,2.63,3.33,12.17,3.09,0.90 +3.12,12.14,1.80,8.54,3.30,3.67,0.88,29.70,1.12,1.12,7.12,2.63,3.33,12.17,3.09,0.90 +3.12,12.14,1.80,8.54,3.30,3.67,0.88,29.70,1.12,1.12,7.12,2.63,3.33,12.17,3.09,0.90 +3.12,12.14,1.80,8.54,3.30,3.67,0.88,29.70,1.12,1.12,7.12,2.63,3.33,12.17,3.07,0.90 +3.12,12.14,1.80,8.54,3.30,3.67,0.88,29.70,1.12,1.12,7.12,2.63,3.33,12.17,3.07,0.90 +3.12,12.14,1.80,8.54,3.30,3.67,0.88,29.70,1.12,1.12,7.12,2.63,3.33,12.17,3.07,0.90 +3.12,12.14,1.80,8.54,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.63,3.33,12.17,3.07,0.90 +3.12,12.14,1.80,8.54,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.63,3.33,12.17,3.07,0.90 +3.12,12.14,1.80,8.54,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.60,3.33,12.17,3.07,0.90 +3.12,12.14,1.80,8.54,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.60,3.33,12.17,3.07,0.90 +3.12,12.14,1.80,8.54,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.60,3.33,12.17,3.07,0.90 +3.12,12.14,1.80,8.18,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.60,3.33,12.17,3.07,0.90 +3.12,12.14,1.80,8.18,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.60,3.33,12.17,3.07,0.90 +2.88,12.14,1.80,8.18,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.60,3.33,12.17,3.07,0.90 +2.88,12.14,1.80,8.18,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.60,3.33,12.17,3.07,0.90 +2.88,12.14,1.80,8.18,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.60,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.18,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.60,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.18,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.60,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.18,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.60,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.18,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.60,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.18,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.60,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.18,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.60,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.18,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.60,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.18,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.60,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.18,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.60,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.18,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.60,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.18,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.60,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.18,3.30,3.67,0.88,29.70,1.12,1.12,7.19,2.60,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.18,3.30,3.67,0.88,29.60,1.12,1.12,7.19,2.60,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.18,3.30,3.67,0.88,29.60,1.12,1.12,7.19,2.60,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.18,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.60,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.18,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.60,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.18,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.60,2.78,12.17,3.07,0.90 +2.88,12.14,1.80,8.18,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.60,2.78,12.17,3.10,0.90 +2.88,12.14,1.80,8.18,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.60,2.78,12.17,3.10,0.90 +2.88,12.14,1.80,8.18,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.60,2.78,12.17,3.10,0.90 +2.88,12.14,1.80,8.18,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.60,2.78,12.17,3.10,0.90 +2.88,12.14,1.80,8.18,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.60,2.78,12.17,3.10,0.90 +2.88,12.14,1.80,8.18,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.57,2.78,12.17,3.10,0.90 +2.88,12.14,1.80,8.18,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.57,2.78,12.17,3.10,0.90 +2.88,12.14,1.80,8.18,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.57,2.78,12.17,3.10,0.90 +2.88,12.14,1.80,8.15,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.57,2.78,12.17,3.10,0.90 +2.88,12.14,1.80,8.15,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.57,2.78,12.17,3.10,0.90 +2.86,12.15,1.80,8.15,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.57,2.78,12.17,3.10,0.90 +2.86,12.15,1.80,8.15,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.57,2.78,12.17,3.10,0.90 +2.86,12.15,1.80,8.15,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.57,2.76,12.17,3.10,0.90 +2.86,12.15,1.80,8.15,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.57,2.76,12.17,3.10,0.90 +2.86,12.15,1.80,8.15,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.57,2.76,12.17,3.10,0.90 +2.86,12.15,1.80,8.15,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.57,2.76,12.17,3.10,0.90 +2.86,12.15,1.80,8.15,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.57,2.76,12.17,3.10,0.90 +2.86,12.15,1.80,8.15,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.57,2.76,12.17,3.10,0.90 +2.86,12.15,1.80,8.15,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.57,2.76,12.17,3.10,0.90 +2.86,12.15,1.80,8.15,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.57,2.76,12.17,3.10,0.90 +2.86,12.15,1.80,8.15,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.57,2.76,12.17,3.10,0.90 +2.86,12.15,1.80,8.15,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.57,2.76,12.17,3.10,0.90 +2.86,12.15,1.80,8.15,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.57,2.76,12.17,3.10,0.90 +2.86,12.15,1.80,8.15,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.57,2.76,12.17,3.10,0.90 +2.86,12.15,1.80,8.15,3.30,3.61,0.88,29.60,1.12,1.12,7.19,2.57,2.76,12.17,3.10,0.90 +2.86,12.15,1.80,8.15,3.30,3.61,0.88,29.70,1.12,1.12,7.19,2.57,2.76,12.17,3.10,0.90 +2.86,12.15,1.80,8.15,3.30,3.61,0.88,29.70,1.12,1.12,7.19,2.57,2.76,12.17,3.10,0.90 +2.86,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.19,2.57,2.76,12.17,3.10,0.90 +2.86,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.19,2.57,2.76,12.17,3.10,0.90 +2.86,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.19,2.57,2.76,12.17,3.10,0.90 +2.86,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.19,2.57,2.76,12.17,3.09,0.90 +2.86,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.19,2.57,2.76,12.17,3.09,0.90 +2.86,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.19,2.57,2.76,12.17,3.09,0.90 +2.86,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.23,2.57,2.76,12.17,3.09,0.90 +2.86,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.23,2.57,2.76,12.17,3.09,0.90 +2.86,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.23,2.59,2.76,12.17,3.09,0.90 +2.86,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.23,2.59,2.76,12.17,3.09,0.90 +2.86,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.23,2.59,2.76,12.17,3.09,0.90 +2.86,12.15,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.23,2.59,2.76,12.17,3.09,0.90 +2.86,12.15,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.23,2.59,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.23,2.59,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.23,2.59,2.76,12.17,3.09,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.23,2.59,2.74,12.17,3.09,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.23,2.59,2.74,12.17,3.09,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.23,2.59,2.74,12.17,3.09,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.23,2.59,2.74,12.17,3.09,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.23,2.59,2.74,12.17,3.09,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.23,2.59,2.74,12.17,3.09,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.23,2.59,2.74,12.17,3.09,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.23,2.59,2.74,12.17,3.09,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.23,2.59,2.74,12.17,3.09,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.23,2.59,2.74,12.17,3.09,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.23,2.59,2.74,12.17,3.09,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.23,2.59,2.74,12.17,3.09,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.70,1.12,1.12,7.23,2.59,2.74,12.17,3.09,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.60,1.12,1.12,7.23,2.59,2.74,12.17,3.09,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.60,1.12,1.12,7.23,2.59,2.74,12.17,3.09,0.90 +2.88,12.15,1.80,8.12,3.30,3.71,0.88,29.60,1.12,1.12,7.23,2.59,2.74,12.17,3.09,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.23,2.59,2.74,12.17,3.09,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.23,2.59,2.74,12.17,3.09,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.23,2.59,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.23,2.59,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.23,2.59,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.59,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.59,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.59,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.59,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.12,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.59,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.59,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.59,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.59,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.59,2.74,12.17,3.10,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.59,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.59,3.49,12.18,3.10,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.59,3.49,12.18,3.10,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.59,3.49,12.18,3.10,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.59,3.49,12.18,3.10,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.59,3.49,12.18,3.10,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.59,3.49,12.18,3.10,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.59,3.49,12.18,3.10,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.59,3.49,12.18,3.10,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.59,3.49,12.18,3.10,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.59,3.49,12.18,3.10,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.60,1.12,1.12,7.17,2.59,3.49,12.18,3.10,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.59,3.49,12.18,3.10,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.59,3.49,12.18,3.10,0.90 +2.88,12.15,1.80,8.20,3.30,3.67,0.88,29.70,1.12,1.12,7.17,2.59,3.49,12.18,3.10,0.90 +2.88,12.15,1.80,8.20,3.30,3.65,0.88,29.70,1.12,1.12,7.17,2.59,3.49,12.18,3.10,0.90 +2.88,12.15,1.80,8.20,3.30,3.65,0.88,29.70,1.12,1.12,7.17,2.59,3.49,12.18,3.10,0.90 +2.88,12.15,1.80,8.20,3.30,3.65,0.88,29.70,1.12,1.12,7.17,2.59,3.49,12.18,3.07,0.90 +2.88,12.15,1.80,8.20,3.30,3.65,0.88,29.70,1.12,1.12,7.17,2.59,3.49,12.18,3.07,0.90 +2.88,12.15,1.80,8.20,3.30,3.65,0.88,29.70,1.12,1.12,7.17,2.59,3.49,12.18,3.07,0.90 +2.88,12.15,1.80,8.20,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.59,3.49,12.18,3.07,0.90 +2.88,12.15,1.80,8.20,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.59,3.49,12.18,3.07,0.90 +2.88,12.15,1.80,8.20,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.59,3.49,12.18,3.07,0.90 +2.88,12.15,1.80,8.20,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.65,3.49,12.18,3.07,0.90 +2.88,12.15,1.80,8.20,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.65,3.49,12.18,3.07,0.90 +2.88,12.15,1.80,8.15,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.65,3.49,12.18,3.07,0.90 +2.88,12.15,1.80,8.15,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.65,3.49,12.18,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.65,3.49,12.18,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.65,3.49,12.18,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.65,3.49,12.17,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.65,2.72,12.17,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.65,2.72,12.17,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.65,2.72,12.17,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.65,2.72,12.17,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.65,2.72,12.17,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.65,2.72,12.17,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.65,2.72,12.17,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.65,2.72,12.17,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.65,2.72,12.17,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.65,2.72,12.17,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.65,2.72,12.17,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.65,0.88,29.70,1.12,1.12,7.21,2.65,2.72,12.17,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.65,2.72,12.17,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.65,2.72,12.17,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.65,0.88,29.60,1.12,1.12,7.21,2.65,2.72,12.17,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.60,0.88,29.60,1.12,1.12,7.21,2.65,2.72,12.17,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.60,0.88,29.60,1.12,1.12,7.21,2.65,2.72,12.17,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.60,0.88,29.60,1.12,1.12,7.21,2.65,2.72,12.17,3.07,0.90 +2.88,12.14,1.80,8.15,3.30,3.60,0.88,29.60,1.12,1.12,7.21,2.65,2.72,12.17,3.03,0.90 +2.88,12.14,1.80,8.15,3.30,3.60,0.88,29.60,1.12,1.12,7.21,2.65,2.72,12.17,3.03,0.90 +2.88,12.14,1.80,8.15,3.30,3.60,0.88,29.60,1.12,1.12,7.16,2.65,2.72,12.17,3.03,0.90 +2.88,12.14,1.80,8.15,3.30,3.60,0.88,29.60,1.12,1.12,7.16,2.65,2.72,12.17,3.03,0.90 +2.88,12.14,1.80,8.15,3.30,3.60,0.88,29.60,1.12,1.12,7.16,2.65,2.72,12.17,3.03,0.90 +2.88,12.14,1.80,8.15,3.30,3.60,0.88,29.60,1.12,1.12,7.16,2.66,2.72,12.17,3.03,0.90 +2.88,12.14,1.80,8.15,3.30,3.60,0.88,29.60,1.12,1.12,7.16,2.66,2.72,12.17,3.03,0.90 +2.88,12.14,1.80,8.15,3.30,3.60,0.88,29.60,1.12,1.12,7.16,2.66,2.72,12.17,3.03,0.90 +2.88,12.14,1.80,8.10,3.30,3.60,0.88,29.60,1.12,1.12,7.16,2.66,2.72,12.17,3.03,0.90 +2.88,12.14,1.80,8.10,3.30,3.60,0.88,29.60,1.12,1.12,7.16,2.66,2.72,12.17,3.03,0.90 +2.86,12.14,1.80,8.10,3.30,3.60,0.88,29.60,1.12,1.12,7.16,2.66,2.72,12.17,3.03,0.90 +2.86,12.14,1.80,8.10,3.30,3.60,0.88,29.60,1.12,1.12,7.16,2.66,2.72,12.17,3.03,0.90 +2.86,12.14,1.80,8.10,3.30,3.60,0.88,29.60,1.12,1.12,7.16,2.66,2.74,12.17,3.03,0.90 +2.86,12.14,1.80,8.10,3.30,3.60,0.88,29.60,1.12,1.12,7.16,2.66,2.74,12.17,3.03,0.90 +2.86,12.14,1.80,8.10,3.30,3.60,0.88,29.60,1.12,1.12,7.16,2.66,2.74,12.17,3.03,0.90 +2.86,12.14,1.80,8.10,3.30,3.60,0.88,29.60,1.12,1.12,7.16,2.66,2.74,12.17,3.03,0.90 +2.86,12.14,1.80,8.10,3.30,3.60,0.88,29.60,1.12,1.12,7.16,2.66,2.74,12.17,3.03,0.90 +2.86,12.14,1.80,8.10,3.30,3.60,0.88,29.60,1.12,1.12,7.16,2.66,2.74,12.17,3.03,0.90 +2.86,12.14,1.80,8.10,3.30,3.60,0.88,29.60,1.12,1.12,7.16,2.66,2.74,12.17,3.03,0.90 +2.86,12.14,1.80,8.10,3.30,3.60,0.88,29.60,1.12,1.12,7.16,2.66,2.74,12.17,3.03,0.90 +2.86,12.14,1.80,8.10,3.30,3.60,0.88,29.60,1.12,1.12,7.16,2.66,2.74,12.17,3.03,0.90 +2.86,12.14,1.80,8.10,3.30,3.60,0.88,29.60,1.12,1.12,7.16,2.66,2.74,12.17,3.03,0.90 +2.86,12.14,1.80,8.10,3.30,3.60,0.88,29.60,1.12,1.12,7.16,2.66,2.74,12.17,3.03,0.90 +2.86,12.14,1.80,8.10,3.30,3.60,0.88,29.60,1.12,1.12,7.16,2.66,2.74,12.17,3.03,0.90 +2.86,12.14,1.80,8.10,3.30,3.60,0.88,29.60,1.12,1.12,7.16,2.66,2.74,12.17,3.03,0.90 +2.86,12.14,1.80,8.10,3.30,3.60,0.88,29.50,1.12,1.12,7.16,2.66,2.74,12.17,3.03,0.90 +2.86,12.14,1.80,8.10,3.30,3.60,0.88,29.50,1.12,1.12,7.16,2.66,2.74,12.17,3.03,0.90 +2.86,12.14,1.80,8.10,3.30,3.58,0.88,29.50,1.12,1.12,7.16,2.66,2.74,12.17,3.03,0.90 +2.86,12.14,1.80,8.10,3.30,3.58,0.88,29.50,1.12,1.12,7.16,2.66,2.74,12.17,3.03,0.90 +2.86,12.14,1.80,8.10,3.30,3.58,0.88,29.50,1.12,1.12,7.16,2.66,2.74,12.17,3.03,0.90 +2.86,12.14,1.80,8.10,3.30,3.58,0.88,29.50,1.12,1.12,7.16,2.66,2.74,12.17,3.03,0.90 +2.86,12.14,1.80,8.10,3.30,3.58,0.88,29.50,1.12,1.12,7.16,2.66,2.74,12.17,3.03,0.90 +2.86,12.14,1.80,8.10,3.30,3.58,0.88,29.50,1.12,1.12,7.25,2.66,2.74,12.17,3.03,0.90 +2.86,12.14,1.80,8.10,3.30,3.58,0.88,29.50,1.12,1.12,7.25,2.66,2.74,12.17,3.03,0.90 +2.86,12.14,1.80,8.10,3.30,3.58,0.88,29.50,1.12,1.12,7.25,2.66,2.74,12.17,3.03,0.90 +2.86,12.14,1.80,8.10,3.30,3.58,0.88,29.50,1.12,1.12,7.25,2.61,2.74,12.17,3.03,0.90 +2.86,12.14,1.80,8.10,3.30,3.58,0.88,29.50,1.12,1.12,7.25,2.61,2.74,12.17,3.03,0.90 +2.86,12.14,1.80,8.17,3.30,3.58,0.88,29.50,1.12,1.12,7.25,2.61,2.74,12.17,3.03,0.90 +2.86,12.14,1.80,8.17,3.30,3.58,0.88,29.50,1.12,1.12,7.25,2.61,2.74,12.17,3.03,0.90 +2.86,12.14,1.80,8.17,3.30,3.58,0.88,29.50,1.12,1.12,7.25,2.61,2.74,12.17,3.03,0.90 +2.88,12.14,1.80,8.17,3.30,3.58,0.88,29.50,1.12,1.12,7.25,2.61,2.74,12.17,3.03,0.90 +2.88,12.14,1.80,8.17,3.30,3.58,0.88,29.50,1.12,1.12,7.25,2.61,2.74,12.17,3.03,0.90 +2.88,12.14,1.80,8.17,3.30,3.58,0.88,29.50,1.12,1.12,7.25,2.61,2.76,12.17,3.03,0.90 +2.88,12.14,1.80,8.17,3.30,3.58,0.88,29.50,1.12,1.12,7.25,2.61,2.76,12.17,3.03,0.90 +2.88,12.14,1.80,8.17,3.30,3.58,0.88,29.50,1.12,1.12,7.25,2.61,2.76,12.17,3.03,0.90 +2.88,12.14,1.80,8.17,3.30,3.58,0.88,29.50,1.12,1.12,7.25,2.61,2.76,12.17,3.03,0.90 +2.88,12.14,1.80,8.17,3.30,3.58,0.88,29.50,1.12,1.12,7.25,2.61,2.76,12.17,3.03,0.90 +2.88,12.14,1.80,8.17,3.30,3.58,0.88,29.50,1.12,1.12,7.25,2.61,2.76,12.17,3.03,0.90 +2.88,12.14,1.80,8.17,3.30,3.58,0.88,29.50,1.12,1.12,7.25,2.61,2.76,12.17,3.03,0.90 +2.88,12.14,1.80,8.17,3.30,3.58,0.88,29.50,1.12,1.12,7.25,2.61,2.76,12.17,3.03,0.90 +2.88,12.14,1.80,8.17,3.30,3.58,0.88,29.50,1.12,1.12,7.25,2.61,2.76,12.17,3.03,0.90 +2.88,12.14,1.80,8.17,3.30,3.58,0.88,29.50,1.12,1.12,7.25,2.61,2.76,12.17,3.03,0.90 +2.88,12.14,1.80,8.17,3.30,3.58,0.88,29.50,1.12,1.12,7.25,2.61,2.76,12.17,3.03,0.90 +2.88,12.14,1.80,8.17,3.30,3.58,0.88,29.50,1.12,1.12,7.25,2.61,2.76,12.17,3.03,0.90 +2.88,12.14,1.80,8.17,3.30,3.58,0.88,29.50,1.12,1.12,7.25,2.61,2.76,12.17,3.03,0.90 +2.88,12.14,1.80,8.17,3.30,3.58,0.88,29.70,1.12,1.12,7.25,2.61,2.76,12.17,3.03,0.90 +2.88,12.14,1.80,8.17,3.30,3.58,0.88,29.70,1.12,1.12,7.25,2.61,2.76,12.17,3.03,0.90 +2.88,12.14,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.25,2.61,2.76,12.17,3.03,0.90 +2.88,12.14,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.25,2.61,2.76,12.17,3.03,0.90 +2.88,12.14,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.25,2.61,2.76,12.17,3.03,0.90 +2.88,12.14,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.25,2.61,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.25,2.61,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.25,2.61,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.22,2.61,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.22,2.61,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.22,2.64,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.22,2.64,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.17,3.30,3.66,0.88,29.70,1.12,1.12,7.22,2.64,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.18,3.30,3.66,0.88,29.70,1.12,1.12,7.22,2.64,2.76,12.17,3.08,0.90 +2.88,12.14,1.80,8.18,3.30,3.66,0.88,29.70,1.12,1.12,7.22,2.64,2.76,12.17,3.08,0.90 +2.86,12.15,1.80,8.18,3.30,3.66,0.88,29.70,1.12,1.12,7.22,2.64,2.76,12.17,3.08,0.90 +2.86,12.15,1.80,8.18,3.30,3.66,0.88,29.70,1.12,1.12,7.22,2.64,2.76,12.17,3.08,0.90 +2.86,12.15,1.80,8.18,3.30,3.66,0.88,29.70,1.12,1.12,7.22,2.64,2.76,12.17,3.08,0.90 +2.86,12.15,1.80,8.18,3.30,3.66,0.88,29.70,1.12,1.12,7.22,2.64,2.76,12.17,3.08,0.90 +2.86,12.15,1.80,8.18,3.30,3.66,0.88,29.70,1.12,1.12,7.22,2.64,2.76,12.17,3.08,0.90 +2.86,12.15,1.80,8.18,3.30,3.66,0.88,29.70,1.12,1.12,7.22,2.64,2.76,12.17,3.08,0.90 +2.86,12.15,1.80,8.18,3.30,3.66,0.88,29.70,1.12,1.12,7.22,2.64,2.76,12.17,3.08,0.90 +2.86,12.15,1.80,8.18,3.30,3.66,0.88,29.70,1.12,1.12,7.22,2.64,2.76,12.17,3.08,0.90 +2.86,12.15,1.80,8.18,3.30,3.66,0.88,29.70,1.12,1.12,7.22,2.64,2.76,12.17,3.08,0.90 +2.86,12.15,1.80,8.18,3.30,3.66,0.88,29.70,1.12,1.12,7.22,2.64,2.76,12.17,3.08,0.90 +2.86,12.15,1.80,8.18,3.30,3.66,0.88,29.70,1.12,1.12,7.22,2.64,2.76,12.17,3.08,0.90 +2.86,12.15,1.80,8.18,3.30,3.66,0.88,29.70,1.12,1.12,7.22,2.64,2.76,12.17,3.08,0.90 +2.86,12.15,1.80,8.18,3.30,3.66,0.88,29.70,1.12,1.12,7.22,2.64,2.76,12.17,3.08,0.90 +2.86,12.15,1.80,8.18,3.30,3.66,0.88,29.70,1.12,1.12,7.22,2.64,2.76,12.17,3.08,0.90 +2.86,12.15,1.80,8.18,3.30,3.66,0.88,29.70,1.12,1.12,7.22,2.64,2.76,12.17,3.08,0.90 +2.86,12.15,1.80,8.18,3.30,3.66,0.88,36.80,1.12,1.12,7.22,2.64,2.76,12.17,3.08,0.90 +2.86,12.15,1.80,8.18,3.30,3.66,0.88,36.80,1.12,1.12,7.22,2.64,2.76,12.17,3.08,0.90 +2.86,12.15,1.80,8.18,3.30,3.92,0.88,36.80,1.12,1.12,7.22,2.64,2.76,12.17,3.08,0.90 +2.86,12.15,1.80,8.18,3.30,3.92,0.88,36.80,1.12,1.12,7.22,2.64,2.76,12.17,3.08,0.90 +2.86,12.15,1.80,8.18,3.30,3.92,0.88,36.80,1.12,1.12,7.22,2.64,2.76,12.17,3.08,0.90 +2.86,12.15,1.80,8.18,3.30,3.92,0.88,36.80,1.12,1.12,7.22,2.64,2.76,12.17,3.12,0.90 +2.86,12.15,1.80,8.18,3.30,3.92,0.88,36.80,1.12,1.12,7.22,2.64,2.76,12.17,3.12,0.90 +2.86,12.15,1.80,8.18,3.30,3.92,0.88,36.80,1.12,1.12,7.10,2.64,2.76,12.17,3.12,0.90 +2.86,12.15,1.80,8.18,3.30,3.92,0.88,36.80,1.12,1.12,7.10,2.64,2.76,12.17,3.12,0.90 +2.86,12.15,1.80,8.18,3.30,3.92,0.88,36.80,1.12,1.12,7.10,2.64,2.76,12.17,3.12,0.90 +2.86,12.15,1.80,8.18,3.30,3.92,0.88,36.80,1.12,1.12,7.10,2.57,2.76,12.17,3.12,0.90 +2.86,12.15,1.80,8.18,3.30,3.92,0.88,36.80,1.12,1.12,7.10,2.57,2.76,12.17,3.12,0.90 +2.86,12.15,1.80,8.18,3.30,3.92,0.88,36.80,1.12,1.12,7.10,2.57,2.76,12.17,3.12,0.90 +2.86,12.15,1.80,8.17,3.30,3.92,0.88,36.80,1.12,1.12,7.10,2.57,2.76,12.17,3.12,0.90 +2.86,12.15,1.80,8.17,3.30,3.92,0.88,36.80,1.12,1.12,7.10,2.57,2.76,12.17,3.12,0.90 +2.88,12.14,1.80,8.17,3.30,3.92,0.88,36.80,1.12,1.12,7.10,2.57,2.76,12.17,3.12,0.90 +2.88,12.14,1.80,8.17,3.30,3.92,0.88,36.80,1.12,1.12,7.10,2.57,2.76,12.17,3.12,0.90 +2.88,12.14,1.80,8.17,3.30,3.92,0.88,36.80,1.12,1.12,7.10,2.57,2.72,12.17,3.12,0.90 +2.88,12.14,1.80,8.17,3.30,3.92,0.88,36.80,1.12,1.12,7.10,2.57,2.72,12.17,3.12,0.90 +2.88,12.14,1.80,8.17,3.30,3.92,0.88,36.80,1.12,1.12,7.10,2.57,2.72,12.17,3.12,0.90 +2.88,12.14,1.80,8.17,3.30,3.92,0.88,36.80,1.12,1.12,7.10,2.57,2.72,12.17,3.12,0.90 +2.88,12.14,1.80,8.17,3.30,3.92,0.88,36.80,1.12,1.12,7.10,2.57,2.72,12.17,3.12,0.90 +2.88,12.14,1.80,8.17,3.30,3.92,0.88,36.80,1.12,1.12,7.10,2.57,2.72,12.17,3.12,0.90 +2.88,12.14,1.80,8.17,3.30,3.92,0.88,36.80,1.12,1.12,7.10,2.57,2.72,12.17,3.12,0.90 +2.88,12.14,1.80,8.17,3.30,3.92,0.88,36.80,1.12,1.12,7.10,2.57,2.72,12.17,3.12,0.90 +2.88,12.14,1.80,8.17,3.30,3.92,0.88,36.80,1.12,1.12,7.10,2.57,2.72,12.17,3.12,0.90 +2.88,12.14,1.80,8.17,3.30,3.92,0.88,36.80,1.12,1.12,7.10,2.57,2.72,12.17,3.12,0.90 +2.88,12.14,1.80,8.17,3.30,3.92,0.88,36.80,1.12,1.12,7.10,2.57,2.72,12.17,3.12,0.90 +2.88,12.14,1.80,8.17,3.30,3.92,0.88,36.80,1.12,1.12,7.10,2.57,2.72,12.17,3.12,0.90 +2.88,12.14,1.80,8.17,3.30,3.92,0.88,36.80,1.12,1.12,7.10,2.57,2.72,12.17,3.12,0.90 +2.88,12.14,1.80,8.17,3.30,3.92,0.88,29.60,1.12,1.12,7.10,2.57,2.72,12.17,3.12,0.90 +2.88,12.14,1.80,8.17,3.30,3.92,0.88,29.60,1.12,1.12,7.10,2.57,2.72,12.17,3.12,0.90 +2.88,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.10,2.57,2.72,12.17,3.12,0.90 +2.88,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.10,2.57,2.72,12.17,3.12,0.90 +2.88,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.10,2.57,2.72,12.17,3.12,0.90 +2.88,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.10,2.57,2.72,12.17,3.14,0.90 +2.88,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.10,2.57,2.72,12.17,3.14,0.90 +2.88,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.10,2.57,2.72,12.17,3.14,0.90 +2.88,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.08,2.57,2.72,12.17,3.14,0.90 +2.88,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.08,2.57,2.72,12.17,3.14,0.90 +2.88,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.08,2.64,2.72,12.17,3.14,0.90 +2.88,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.08,2.64,2.72,12.17,3.14,0.90 +2.88,12.14,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.08,2.64,2.72,12.17,3.14,0.90 +2.88,12.14,1.80,8.18,3.30,3.73,0.88,29.60,1.12,1.12,7.08,2.64,2.72,12.17,3.14,0.90 +2.88,12.14,1.80,8.18,3.30,3.73,0.88,29.60,1.12,1.12,7.08,2.64,2.72,12.17,3.14,0.90 +2.88,12.15,1.80,8.18,3.30,3.73,0.88,29.60,1.12,1.12,7.08,2.64,2.72,12.17,3.14,0.90 +2.88,12.15,1.80,8.18,3.30,3.73,0.88,29.60,1.12,1.12,7.08,2.64,2.72,12.17,3.14,0.90 +2.88,12.15,1.80,8.18,3.30,3.73,0.88,29.60,1.12,1.12,7.08,2.64,2.72,12.17,3.14,0.90 +2.88,12.15,1.80,8.18,3.30,3.73,0.88,29.60,1.12,1.12,7.08,2.64,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.18,3.30,3.73,0.88,29.60,1.12,1.12,7.08,2.64,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.18,3.30,3.73,0.88,29.60,1.12,1.12,7.08,2.64,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.18,3.30,3.73,0.88,29.60,1.12,1.12,7.08,2.64,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.18,3.30,3.73,0.88,29.60,1.12,1.12,7.08,2.64,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.18,3.30,3.73,0.88,29.60,1.12,1.12,7.08,2.64,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.18,3.30,3.73,0.88,29.60,1.12,1.12,7.08,2.64,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.18,3.30,3.73,0.88,29.60,1.12,1.12,7.08,2.64,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.18,3.30,3.73,0.88,29.60,1.12,1.12,7.08,2.64,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.18,3.30,3.73,0.88,29.60,1.12,1.12,7.08,2.64,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.18,3.30,3.73,0.88,29.60,1.12,1.12,7.08,2.64,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.18,3.30,3.73,0.88,29.60,1.12,1.12,7.08,2.64,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.18,3.30,3.73,0.88,29.70,1.12,1.12,7.08,2.64,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.18,3.30,3.73,0.88,29.70,1.12,1.12,7.08,2.64,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.18,3.30,3.73,0.88,29.70,1.12,1.12,7.08,2.64,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.18,3.30,3.75,0.88,29.70,1.12,1.12,7.08,2.64,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.18,3.30,3.75,0.88,29.70,1.12,1.12,7.08,2.64,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.18,3.30,3.75,0.88,29.70,1.12,1.12,7.08,2.64,2.74,12.17,3.12,0.90 +2.88,12.15,1.80,8.18,3.30,3.75,0.88,29.70,1.12,1.12,7.08,2.64,2.74,12.17,3.12,0.90 +2.88,12.15,1.80,8.18,3.30,3.75,0.88,29.70,1.12,1.12,7.08,2.64,2.74,12.17,3.12,0.90 +2.88,12.15,1.80,8.18,3.30,3.75,0.88,29.70,1.12,1.12,7.07,2.64,2.74,12.17,3.12,0.90 +2.88,12.15,1.80,8.18,3.30,3.75,0.88,29.70,1.12,1.12,7.07,2.64,2.74,12.17,3.12,0.90 +2.88,12.15,1.80,8.18,3.30,3.75,0.88,29.70,1.12,1.12,7.07,2.64,2.74,12.17,3.12,0.90 +2.88,12.15,1.80,8.18,3.30,3.75,0.88,29.70,1.12,1.12,7.07,2.62,2.74,12.17,3.12,0.90 +2.88,12.15,1.80,8.18,3.30,3.75,0.88,29.70,1.12,1.12,7.07,2.62,2.74,12.17,3.12,0.90 +2.88,12.15,1.80,8.14,3.30,3.75,0.88,29.70,1.12,1.12,7.07,2.62,2.74,12.17,3.12,0.90 +2.88,12.15,1.80,8.14,3.30,3.75,0.88,29.70,1.12,1.12,7.07,2.62,2.74,12.17,3.12,0.90 +2.88,12.15,1.80,8.14,3.30,3.75,0.88,29.70,1.12,1.12,7.07,2.62,2.74,12.17,3.12,0.90 +2.88,12.15,1.80,8.14,3.30,3.75,0.88,29.70,1.12,1.12,7.07,2.62,2.74,12.17,3.12,0.90 +2.88,12.15,1.80,8.14,3.30,3.75,0.88,29.70,1.12,1.12,7.07,2.62,2.74,12.17,3.12,0.90 +2.88,12.15,1.80,8.14,3.30,3.75,0.88,29.70,1.12,1.12,7.07,2.62,2.76,12.17,3.12,0.90 +2.88,12.15,1.80,8.14,3.30,3.75,0.88,29.70,1.12,1.12,7.07,2.62,2.76,12.17,3.12,0.90 +2.88,12.15,1.80,8.14,3.30,3.75,0.88,29.70,1.12,1.12,7.07,2.62,2.76,12.17,3.12,0.90 +2.88,12.15,1.80,8.14,3.30,3.75,0.88,29.70,1.12,1.12,7.07,2.62,2.76,12.17,3.12,0.90 +2.88,12.15,1.80,8.14,3.30,3.75,0.88,29.70,1.12,1.12,7.07,2.62,2.76,12.17,3.12,0.90 +2.88,12.15,1.80,8.14,3.30,3.75,0.88,29.70,1.12,1.12,7.07,2.62,2.76,12.17,3.12,0.90 +2.88,12.15,1.80,8.14,3.30,3.75,0.88,29.70,1.12,1.12,7.07,2.62,2.76,12.17,3.12,0.90 +2.88,12.15,1.80,8.14,3.30,3.75,0.88,29.70,1.12,1.12,7.07,2.62,2.76,12.17,3.12,0.90 +2.88,12.15,1.80,8.14,3.30,3.75,0.88,29.70,1.12,1.12,7.07,2.62,2.76,12.17,3.12,0.90 +2.88,12.15,1.80,8.14,3.30,3.75,0.88,29.70,1.12,1.12,7.07,2.62,2.76,12.17,3.12,0.90 +2.88,12.15,1.80,8.14,3.30,3.75,0.88,29.70,1.12,1.12,7.07,2.62,2.76,12.17,3.12,0.90 +2.88,12.15,1.80,8.14,3.30,3.75,0.88,29.70,1.12,1.12,7.07,2.62,2.76,12.17,3.12,0.90 +2.88,12.15,1.80,8.14,3.30,3.75,0.88,29.70,1.12,1.12,7.07,2.62,2.76,12.17,3.12,0.90 +2.88,12.15,1.80,8.14,3.30,3.75,0.88,29.70,1.12,1.12,7.07,2.62,2.76,12.17,3.12,0.90 +2.88,12.15,1.80,8.14,3.30,3.75,0.88,29.70,1.12,1.12,7.07,2.62,2.76,12.17,3.12,0.90 +2.88,12.15,1.80,8.14,3.30,3.71,0.88,29.70,1.12,1.12,7.07,2.62,2.76,12.17,3.12,0.90 +2.88,12.15,1.80,8.14,3.30,3.71,0.88,29.70,1.12,1.12,7.07,2.62,2.76,12.17,3.12,0.90 +2.88,12.15,1.80,8.14,3.30,3.71,0.88,29.70,1.12,1.12,7.07,2.62,2.76,12.17,3.19,0.90 +2.88,12.15,1.80,8.14,3.30,3.71,0.88,29.70,1.12,1.12,7.07,2.62,2.76,12.17,3.19,0.90 +2.88,12.15,1.80,8.14,3.30,3.71,0.88,29.70,1.12,1.12,7.07,2.62,2.76,12.17,3.19,0.90 +2.88,12.15,1.80,8.14,3.30,3.71,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.88,12.15,1.80,8.14,3.30,3.71,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.88,12.15,1.80,8.14,3.30,3.71,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.88,12.15,1.80,8.14,3.30,3.71,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.88,12.15,1.80,8.14,3.30,3.71,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.88,12.15,1.80,8.14,3.30,3.71,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.88,12.15,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.88,12.15,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.86,12.15,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.86,12.15,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.86,12.15,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.86,12.15,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.86,12.15,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.86,12.15,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.86,12.15,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.86,12.15,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.86,12.15,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.86,12.15,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.86,12.15,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.86,12.15,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.86,12.15,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.86,12.15,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.86,12.15,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.86,12.15,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.86,12.15,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.86,12.15,1.80,8.06,3.30,4.02,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.86,12.15,1.80,8.06,3.30,4.02,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.86,12.15,1.80,8.06,3.30,4.02,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.19,0.90 +2.86,12.15,1.80,8.06,3.30,4.02,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.50,0.90 +2.86,12.15,1.80,8.06,3.30,4.02,0.88,29.70,1.12,1.12,7.13,2.62,2.76,12.17,3.50,0.90 +2.86,12.15,1.80,8.06,3.30,4.02,0.88,29.70,1.12,1.12,7.14,2.62,2.76,12.17,3.50,0.90 +2.86,12.15,1.80,8.06,3.30,4.02,0.88,29.70,1.12,1.12,7.14,2.62,2.76,12.17,3.50,0.90 +2.86,12.15,1.80,8.06,3.30,4.02,0.88,29.70,1.12,1.12,7.14,2.57,2.76,12.17,3.50,0.90 +2.86,12.15,1.80,8.06,3.30,4.02,0.88,29.70,1.12,1.12,7.14,2.57,2.76,12.17,3.50,0.90 +2.86,12.15,1.80,8.06,3.30,4.02,0.88,29.70,1.12,1.12,7.14,2.57,2.76,12.17,3.50,0.90 +2.86,12.15,1.80,8.12,3.30,4.02,0.88,29.70,1.12,1.12,7.14,2.57,2.76,12.17,3.50,0.90 +2.86,12.15,1.80,8.12,3.30,4.02,0.88,29.70,1.12,1.12,7.14,2.57,2.76,12.17,3.50,0.90 +2.86,12.15,1.80,8.12,3.30,4.02,0.88,29.70,1.12,1.12,7.14,2.57,2.76,12.17,3.50,0.90 +2.86,12.15,1.80,8.12,3.30,4.02,0.88,29.70,1.12,1.12,7.14,2.57,2.76,12.17,3.50,0.90 +2.86,12.15,1.80,8.12,3.30,4.02,0.88,29.70,1.12,1.12,7.14,2.57,2.76,12.18,3.50,0.90 +2.86,12.15,1.80,8.12,3.30,4.02,0.88,29.70,1.12,1.12,7.14,2.57,2.74,12.18,3.50,0.90 +2.86,12.15,1.80,8.12,3.30,4.02,0.88,29.70,1.12,1.12,7.14,2.57,2.74,12.18,3.50,0.90 +2.86,12.15,1.80,8.12,3.30,4.02,0.88,29.70,1.12,1.12,7.14,2.57,2.74,12.18,3.50,0.90 +2.86,12.15,1.80,8.12,3.30,4.02,0.88,29.70,1.12,1.12,7.14,2.57,2.74,12.18,3.50,0.90 +2.86,12.15,1.80,8.12,3.30,4.02,0.88,29.70,1.12,1.12,7.14,2.57,2.74,12.18,3.50,0.90 +2.86,12.15,1.80,8.12,3.30,4.02,0.88,29.70,1.12,1.12,7.14,2.57,2.74,12.18,3.50,0.90 +2.86,12.15,1.80,8.12,3.30,4.02,0.88,29.70,1.12,1.12,7.14,2.57,2.74,12.18,3.50,0.90 +2.86,12.15,1.80,8.12,3.30,4.02,0.88,29.70,1.12,1.12,7.14,2.57,2.74,12.18,3.50,0.90 +2.86,12.15,1.80,8.12,3.30,4.02,0.88,29.70,1.12,1.12,7.14,2.57,2.74,12.18,3.50,0.90 +2.86,12.15,1.80,8.12,3.30,4.02,0.88,29.70,1.12,1.12,7.14,2.57,2.74,12.18,3.50,0.90 +2.86,12.15,1.80,8.12,3.30,4.02,0.88,29.70,1.12,1.12,7.14,2.57,2.74,12.18,3.50,0.90 +2.86,12.15,1.80,8.12,3.30,4.02,0.88,29.70,1.12,1.12,7.14,2.57,2.74,12.18,3.50,0.90 +2.86,12.15,1.80,8.12,3.30,4.02,0.88,29.70,1.12,1.12,7.14,2.57,2.74,12.18,3.50,0.90 +2.86,12.15,1.80,8.12,3.30,4.02,0.88,29.70,1.12,1.12,7.14,2.57,2.74,12.18,3.50,0.90 +2.86,12.15,1.80,8.12,3.30,4.02,0.88,29.70,1.12,1.12,7.14,2.57,2.74,12.18,3.50,0.90 +2.86,12.15,1.80,8.12,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.57,2.74,12.18,3.50,0.90 +2.86,12.15,1.80,8.12,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.57,2.74,12.18,3.50,0.90 +2.86,12.15,1.80,8.12,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.57,2.74,12.18,3.17,0.90 +2.86,12.15,1.80,8.12,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.57,2.74,12.18,3.17,0.90 +2.86,12.15,1.80,8.12,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.57,2.74,12.18,3.17,0.90 +2.86,12.15,1.80,8.12,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.57,2.74,12.18,3.17,0.90 +2.86,12.15,1.80,8.12,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.57,2.74,12.18,3.17,0.90 +2.86,12.15,1.80,8.12,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.57,2.74,12.18,3.17,0.90 +2.86,12.15,1.80,8.12,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.18,3.17,0.90 +2.86,12.15,1.80,8.12,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.18,3.17,0.90 +2.86,12.15,1.80,8.12,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.18,3.17,0.90 +2.86,12.15,1.80,8.20,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.18,3.17,0.90 +2.86,12.14,1.80,8.20,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.18,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.18,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.60,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.60,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.74,0.88,29.60,1.12,1.12,7.14,2.60,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.74,0.88,29.60,1.12,1.12,7.14,2.60,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.74,0.88,29.60,1.12,1.12,7.14,2.60,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.20,3.30,3.74,0.88,29.60,1.12,1.12,7.14,2.60,2.74,12.17,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.74,0.88,29.60,1.12,1.12,7.14,2.60,2.74,12.17,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.74,0.88,29.60,1.12,1.12,7.11,2.60,2.74,12.17,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.74,0.88,29.60,1.12,1.12,7.11,2.60,2.74,12.17,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.74,0.88,29.60,1.12,1.12,7.11,2.60,2.74,12.17,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.74,0.88,29.60,1.12,1.12,7.11,2.55,2.74,12.17,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.74,0.88,29.60,1.12,1.12,7.11,2.55,2.74,12.17,3.14,0.90 +2.88,12.14,1.80,8.20,3.30,3.74,0.88,29.60,1.12,1.12,7.11,2.55,2.74,12.17,3.14,0.90 +2.88,12.14,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.11,2.55,2.74,12.17,3.14,0.90 +2.88,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.11,2.55,2.74,12.17,3.14,0.90 +2.90,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.11,2.55,2.74,12.17,3.14,0.90 +2.90,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.11,2.55,2.74,12.17,3.14,0.90 +2.90,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.11,2.55,2.74,12.18,3.14,0.90 +2.90,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.11,2.55,2.74,12.18,3.14,0.90 +2.90,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.11,2.55,2.74,12.18,3.14,0.90 +2.90,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.11,2.55,2.74,12.18,3.14,0.90 +2.90,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.11,2.55,2.74,12.18,3.14,0.90 +2.90,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.11,2.55,2.74,12.18,3.14,0.90 +2.90,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.11,2.55,2.74,12.18,3.14,0.90 +2.90,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.11,2.55,2.74,12.18,3.14,0.90 +2.90,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.11,2.55,2.74,12.18,3.14,0.90 +2.90,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.11,2.55,2.74,12.18,3.14,0.90 +2.90,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.11,2.55,2.74,12.18,3.14,0.90 +2.90,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.11,2.55,2.74,12.18,3.14,0.90 +2.90,12.15,1.80,8.17,3.30,3.74,0.88,29.60,1.12,1.12,7.11,2.55,2.74,12.18,3.14,0.90 +2.90,12.15,1.80,8.17,3.30,3.74,0.88,29.70,1.12,1.12,7.11,2.55,2.74,12.18,3.14,0.90 +2.90,12.15,1.80,8.17,3.30,3.74,0.88,29.70,1.12,1.12,7.11,2.55,2.74,12.18,3.14,0.90 +2.90,12.15,1.80,8.17,3.30,3.69,0.88,29.70,1.12,1.12,7.11,2.55,2.74,12.18,3.14,0.90 +2.90,12.15,1.80,8.17,3.30,3.69,0.88,29.70,1.12,1.12,7.11,2.55,2.74,12.18,3.14,0.90 +2.90,12.15,1.80,8.17,3.30,3.69,0.88,29.70,1.12,1.12,7.11,2.55,2.74,12.18,3.14,0.90 +2.90,12.15,1.80,8.17,3.30,3.69,0.88,29.70,1.12,1.12,7.11,2.55,2.74,12.18,3.08,0.90 +2.90,12.15,1.80,8.17,3.30,3.69,0.88,29.70,1.12,1.12,7.11,2.55,2.74,12.18,3.08,0.90 +2.90,12.15,1.80,8.17,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.55,2.74,12.18,3.08,0.90 +2.90,12.15,1.80,8.17,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.55,2.74,12.18,3.08,0.90 +2.90,12.15,1.80,8.17,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.55,2.74,12.18,3.08,0.90 +2.90,12.15,1.80,8.17,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.63,2.74,12.18,3.08,0.90 +2.90,12.15,1.80,8.17,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.63,2.74,12.18,3.08,0.90 +2.90,12.15,1.80,8.17,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.63,2.74,12.18,3.08,0.90 +2.90,12.15,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.63,2.74,12.18,3.08,0.90 +2.90,12.15,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.63,2.74,12.18,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.63,2.74,12.18,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.63,2.74,12.18,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.63,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.63,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.63,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.63,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.63,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.63,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.63,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.63,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.63,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.63,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.63,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.63,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.70,1.12,1.12,7.23,2.63,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.60,1.12,1.12,7.23,2.63,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.60,1.12,1.12,7.23,2.63,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.69,0.88,29.60,1.12,1.12,7.23,2.63,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.67,0.88,29.60,1.12,1.12,7.23,2.63,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.67,0.88,29.60,1.12,1.12,7.23,2.63,2.74,12.17,3.08,0.90 +2.88,12.14,1.80,8.20,3.30,3.67,0.88,29.60,1.12,1.12,7.23,2.63,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.20,3.30,3.67,0.88,29.60,1.12,1.12,7.23,2.63,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.20,3.30,3.67,0.88,29.60,1.12,1.12,7.23,2.63,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.20,3.30,3.67,0.88,29.60,1.12,1.12,7.22,2.63,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.20,3.30,3.67,0.88,29.60,1.12,1.12,7.22,2.63,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.20,3.30,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.20,3.30,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.20,3.30,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.37,3.30,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.37,3.30,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.10,0.90 +3.04,12.14,1.80,8.37,3.30,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.10,0.90 +3.04,12.14,1.80,8.37,3.30,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.10,0.90 +3.04,12.14,1.80,8.37,3.30,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.10,0.90 +3.04,12.14,1.80,8.37,3.30,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.10,0.90 +3.04,12.14,1.80,8.37,3.30,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.10,0.90 +3.04,12.14,1.80,8.37,3.30,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.10,0.90 +3.04,12.14,1.80,8.37,3.30,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.10,0.90 +3.04,12.14,1.80,8.37,3.30,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.10,0.90 +3.04,12.14,1.80,8.37,3.30,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.10,0.90 +3.04,12.14,1.80,8.37,3.30,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.10,0.90 +3.04,12.14,1.80,8.37,3.30,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.10,0.90 +3.04,12.14,1.80,8.37,3.30,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.10,0.90 +3.04,12.14,1.80,8.37,3.30,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.10,0.90 +3.04,12.14,1.80,8.37,3.30,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.10,0.90 +3.04,12.14,1.80,8.37,3.30,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.10,0.90 +3.04,12.14,1.80,8.37,3.30,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.10,0.90 +3.04,12.14,1.80,8.37,3.30,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.10,0.90 +3.04,12.14,1.80,8.37,3.29,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.10,0.90 +3.04,12.14,1.80,8.37,3.29,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.10,0.90 +3.04,12.14,1.80,8.37,3.29,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.10,0.90 +3.04,12.14,1.80,8.37,3.29,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.09,0.90 +3.04,12.14,1.80,8.37,3.29,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.09,0.90 +3.04,12.14,1.80,8.37,3.29,3.67,0.88,29.60,1.12,1.12,7.22,2.67,2.74,12.17,3.09,0.90 +3.04,12.14,1.80,8.37,3.29,3.67,0.88,29.60,1.12,1.12,7.24,2.67,2.74,12.17,3.09,0.90 +3.04,12.14,1.80,8.37,3.29,3.67,0.88,29.60,1.12,1.12,7.24,2.67,2.74,12.17,3.09,0.90 +3.04,12.14,1.80,8.37,3.29,3.67,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.09,0.90 +3.04,12.14,1.80,8.37,3.29,3.67,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.09,0.90 +3.04,12.14,1.80,8.37,3.29,3.67,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.09,0.90 +3.04,12.14,1.80,8.25,3.29,3.67,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.09,0.90 +3.04,12.14,1.80,8.25,3.29,3.67,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.25,3.29,3.67,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.25,3.29,3.67,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.25,3.29,3.67,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.25,3.29,3.67,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.25,3.29,3.67,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.25,3.29,3.67,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.25,3.29,3.67,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.25,3.29,3.67,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.25,3.29,3.67,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.25,3.29,3.67,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.25,3.29,3.67,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.25,3.29,3.67,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.25,3.29,3.67,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.25,3.29,3.67,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.25,3.29,3.67,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.25,3.29,3.67,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.25,3.29,3.67,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.25,3.30,3.67,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.24,2.64,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.64,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.64,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.64,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.61,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.61,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.61,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.61,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.61,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.61,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.61,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.61,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.61,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.61,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.61,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.61,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.61,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.61,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.61,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.61,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.61,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.61,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.60,1.12,1.12,7.14,2.61,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.65,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.04,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.14,2.61,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.61,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.61,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.69,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.69,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.69,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.69,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.69,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.69,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.69,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.69,2.74,12.18,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.69,2.76,12.18,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.69,2.76,12.18,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.69,2.76,12.18,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.69,2.76,12.18,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.69,2.76,12.18,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.69,2.76,12.18,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.69,2.76,12.18,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.69,2.76,12.18,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.69,2.76,12.18,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.69,2.76,12.18,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.69,2.76,12.18,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.70,1.12,1.12,7.13,2.69,2.76,12.18,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.60,1.12,1.12,7.13,2.69,2.76,12.18,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.60,1.12,1.12,7.13,2.69,2.76,12.18,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.60,1.12,1.12,7.13,2.69,2.76,12.18,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.60,1.12,1.12,7.13,2.69,2.76,12.18,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.60,1.12,1.12,7.13,2.69,2.76,12.18,3.10,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.60,1.12,1.12,7.13,2.69,2.76,12.18,3.02,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.60,1.12,1.12,7.13,2.69,2.76,12.18,3.02,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.60,1.12,1.12,7.13,2.69,2.76,12.18,3.02,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.60,1.12,1.12,7.17,2.69,2.76,12.18,3.02,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.60,1.12,1.12,7.17,2.69,2.76,12.18,3.02,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.60,1.12,1.12,7.17,2.69,2.76,12.18,3.02,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.60,1.12,1.12,7.17,2.64,2.76,12.18,3.02,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.60,1.12,1.12,7.17,2.64,2.76,12.18,3.02,0.90 +2.88,12.14,1.80,8.25,3.30,3.69,0.88,29.60,1.12,1.12,7.17,2.64,2.76,12.18,3.02,0.90 +2.88,12.14,1.80,8.15,3.30,3.69,0.88,29.60,1.12,1.12,7.17,2.64,2.76,12.18,3.02,0.90 +2.88,12.15,1.80,8.15,3.30,3.69,0.88,29.60,1.12,1.12,7.17,2.64,2.76,12.18,3.02,0.90 +3.14,12.15,1.80,8.15,3.30,3.69,0.88,29.60,1.12,1.12,7.17,2.64,2.76,12.18,3.02,0.90 +3.14,12.15,1.80,8.15,3.30,3.69,0.88,29.60,1.12,1.12,7.17,2.64,2.76,12.17,3.02,0.90 +3.14,12.15,1.80,8.15,3.30,3.69,0.88,29.60,1.12,1.12,7.17,2.64,3.51,12.17,3.02,0.90 +3.14,12.15,1.80,8.15,3.30,3.69,0.88,29.60,1.12,1.12,7.17,2.64,3.51,12.17,3.02,0.90 +3.14,12.15,1.80,8.15,3.30,3.69,0.88,29.60,1.12,1.12,7.17,2.64,3.51,12.17,3.02,0.90 +3.14,12.15,1.80,8.15,3.30,3.69,0.88,29.60,1.12,1.12,7.17,2.64,3.51,12.17,3.02,0.90 +3.14,12.15,1.80,8.15,3.30,3.69,0.88,29.60,1.12,1.12,7.17,2.64,3.51,12.17,3.02,0.90 +3.14,12.15,1.80,8.15,3.30,3.69,0.88,29.60,1.12,1.12,7.17,2.64,3.51,12.17,3.02,0.90 +3.14,12.15,1.80,8.15,3.30,3.69,0.88,29.60,1.12,1.12,7.17,2.64,3.51,12.17,3.02,0.90 +3.14,12.15,1.80,8.15,3.30,3.69,0.88,29.60,1.12,1.12,7.17,2.64,3.51,12.17,3.02,0.90 +3.14,12.15,1.80,8.15,3.30,3.69,0.88,29.60,1.12,1.12,7.17,2.64,3.51,12.17,3.02,0.90 +3.14,12.15,1.80,8.15,3.30,3.69,0.88,29.60,1.12,1.12,7.17,2.64,3.51,12.17,3.02,0.90 +3.14,12.15,1.80,8.15,3.30,3.69,0.88,29.60,1.12,1.12,7.17,2.64,3.51,12.17,3.02,0.90 +3.14,12.15,1.80,8.15,3.30,3.69,0.88,29.60,1.12,1.12,7.17,2.64,3.51,12.17,3.02,0.90 +3.14,12.15,1.80,8.15,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.64,3.51,12.17,3.02,0.90 +3.14,12.15,1.80,8.15,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.64,3.51,12.17,3.02,0.90 +3.14,12.15,1.80,8.15,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.64,3.51,12.17,3.02,0.90 +3.14,12.15,1.80,8.15,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.64,3.51,12.17,3.02,0.90 +3.14,12.15,1.80,8.15,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.64,3.51,12.17,3.02,0.90 +3.14,12.15,1.80,8.15,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.64,3.51,12.17,3.16,0.90 +3.14,12.15,1.80,8.15,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.64,3.51,12.17,3.16,0.90 +3.14,12.15,1.80,8.15,3.30,3.70,0.88,29.70,1.12,1.12,7.17,2.64,3.51,12.17,3.16,0.90 +3.14,12.15,1.80,8.15,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.64,3.51,12.17,3.16,0.90 +3.14,12.15,1.80,8.15,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.64,3.51,12.17,3.16,0.90 +3.14,12.15,1.80,8.15,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.64,3.51,12.17,3.16,0.90 +3.14,12.15,1.80,8.15,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.63,3.51,12.17,3.16,0.90 +3.14,12.15,1.80,8.15,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.63,3.51,12.17,3.16,0.90 +3.14,12.15,1.80,8.25,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.63,3.51,12.17,3.16,0.90 +3.14,12.15,1.80,8.25,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.63,3.51,12.17,3.16,0.90 +3.14,12.15,1.80,8.25,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.63,3.51,12.17,3.16,0.90 +2.88,12.15,1.80,8.25,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.63,3.51,12.17,3.16,0.90 +2.88,12.15,1.80,8.25,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.63,3.51,12.17,3.16,0.90 +2.88,12.15,1.80,8.25,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.63,2.76,12.17,3.16,0.90 +2.88,12.15,1.80,8.25,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.63,2.76,12.17,3.16,0.90 +2.88,12.15,1.80,8.25,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.63,2.76,12.17,3.16,0.90 +2.88,12.15,1.80,8.25,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.63,2.76,12.17,3.16,0.90 +2.88,12.15,1.80,8.25,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.63,2.76,12.17,3.16,0.90 +2.88,12.15,1.80,8.25,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.63,2.76,12.17,3.16,0.90 +2.88,12.15,1.80,8.25,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.63,2.76,12.17,3.16,0.90 +2.88,12.15,1.80,8.25,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.63,2.76,12.17,3.16,0.90 +2.88,12.15,1.80,8.25,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.63,2.76,12.17,3.16,0.90 +2.88,12.15,1.80,8.25,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.63,2.76,12.17,3.16,0.90 +2.88,12.15,1.80,8.25,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.63,2.76,12.17,3.16,0.90 +2.88,12.15,1.80,8.25,3.30,3.70,0.88,29.70,1.12,1.12,7.16,2.63,2.76,12.17,3.16,0.90 +2.88,12.15,1.80,8.25,3.30,3.70,0.88,29.60,1.12,1.12,7.16,2.63,2.76,12.17,3.16,0.90 +2.88,12.15,1.80,8.25,3.30,3.70,0.88,29.60,1.12,1.12,7.16,2.63,2.76,12.17,3.16,0.90 +2.88,12.15,1.80,8.25,3.30,3.70,0.88,29.60,1.12,1.12,7.16,2.63,2.76,12.17,3.16,0.90 +2.88,12.15,1.80,8.25,3.30,3.64,0.88,29.60,1.12,1.12,7.16,2.63,2.76,12.17,3.16,0.90 +2.88,12.15,1.80,8.25,3.30,3.64,0.88,29.60,1.12,1.12,7.16,2.63,2.76,12.17,3.16,0.90 +2.88,12.15,1.80,8.25,3.30,3.64,0.88,29.60,1.12,1.12,7.16,2.63,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.25,3.30,3.64,0.88,29.60,1.12,1.12,7.16,2.63,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.25,3.30,3.64,0.88,29.60,1.12,1.12,7.16,2.63,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.25,3.30,3.64,0.88,29.60,1.12,1.12,7.11,2.63,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.25,3.30,3.64,0.88,29.60,1.12,1.12,7.11,2.63,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.25,3.30,3.64,0.88,29.60,1.12,1.12,7.11,2.63,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.25,3.30,3.64,0.88,29.60,1.12,1.12,7.11,2.64,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.25,3.30,3.64,0.88,29.60,1.12,1.12,7.11,2.64,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.15,3.30,3.64,0.88,29.60,1.12,1.12,7.11,2.64,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.15,3.30,3.64,0.88,29.60,1.12,1.12,7.11,2.64,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.15,3.30,3.64,0.88,29.60,1.12,1.12,7.11,2.64,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.15,3.30,3.64,0.88,29.60,1.12,1.12,7.11,2.64,2.76,12.17,3.08,0.90 +2.88,12.15,1.80,8.15,3.30,3.64,0.88,29.60,1.12,1.12,7.11,2.64,2.76,12.18,3.08,0.90 +2.88,12.15,1.80,8.15,3.30,3.64,0.88,29.60,1.12,1.12,7.11,2.64,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.15,3.30,3.64,0.88,29.60,1.12,1.12,7.11,2.64,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.15,3.30,3.64,0.88,29.60,1.12,1.12,7.11,2.64,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.15,3.30,3.64,0.88,29.60,1.12,1.12,7.11,2.64,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.15,3.30,3.64,0.88,29.60,1.12,1.12,7.11,2.64,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.15,3.30,3.64,0.88,29.60,1.12,1.12,7.11,2.64,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.15,3.30,3.64,0.88,29.60,1.12,1.12,7.11,2.64,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.15,3.30,3.64,0.88,29.60,1.12,1.12,7.11,2.64,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.15,3.30,3.64,0.88,29.60,1.12,1.12,7.11,2.64,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.15,3.30,3.64,0.88,29.60,1.12,1.12,7.11,2.64,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.15,3.30,3.64,0.88,29.60,1.12,1.12,7.11,2.64,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.15,3.30,3.64,0.88,29.60,1.12,1.12,7.11,2.64,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.15,3.30,3.64,0.88,29.60,1.12,1.12,7.11,2.64,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.15,3.30,3.64,0.88,29.60,1.12,1.12,7.11,2.64,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.15,3.30,3.64,0.88,29.60,1.12,1.12,7.11,2.64,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.15,3.30,3.58,0.88,29.60,1.12,1.12,7.11,2.64,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.15,3.30,3.58,0.88,29.60,1.12,1.12,7.11,2.64,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.15,3.30,3.58,0.88,29.60,1.12,1.12,7.11,2.64,2.74,12.18,3.08,0.90 +2.88,12.15,1.80,8.15,3.30,3.58,0.88,29.60,1.12,1.12,7.11,2.64,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.58,0.88,29.60,1.12,1.12,7.11,2.64,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.58,0.88,29.60,1.12,1.12,7.18,2.64,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.58,0.88,29.60,1.12,1.12,7.18,2.64,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.58,0.88,29.60,1.12,1.12,7.18,2.64,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.58,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.58,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.15,3.30,3.58,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.18,3.10,0.90 +2.88,12.15,1.80,8.23,3.30,3.58,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.18,3.10,0.90 +2.88,12.14,1.80,8.23,3.30,3.58,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.18,3.10,0.90 +2.88,12.14,1.80,8.23,3.30,3.58,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.18,3.10,0.90 +2.88,12.14,1.80,8.23,3.30,3.58,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.18,3.10,0.90 +2.88,12.14,1.80,8.23,3.30,3.58,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.23,3.30,3.58,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.23,3.30,3.58,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.23,3.30,3.58,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.23,3.30,3.58,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.23,3.30,3.58,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.23,3.30,3.58,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.23,3.30,3.58,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.23,3.30,3.58,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.23,3.30,3.58,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.23,3.30,3.58,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.23,3.30,3.58,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.23,3.30,3.58,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.23,3.30,3.58,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.23,3.30,3.58,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.23,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.23,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.23,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.17,3.10,0.90 +2.88,12.14,1.80,8.23,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.23,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.23,3.30,3.74,0.88,29.60,1.12,1.12,7.18,2.60,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.23,3.30,3.74,0.88,29.60,1.12,1.12,7.20,2.60,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.23,3.30,3.74,0.88,29.60,1.12,1.12,7.20,2.60,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.23,3.30,3.74,0.88,29.60,1.12,1.12,7.20,2.60,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.23,3.30,3.74,0.88,29.60,1.12,1.12,7.20,2.62,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.23,3.30,3.74,0.88,29.60,1.12,1.12,7.20,2.62,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.74,0.88,29.60,1.12,1.12,7.20,2.62,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.74,0.88,29.60,1.12,1.12,7.20,2.62,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.74,0.88,29.60,1.12,1.12,7.20,2.62,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.74,0.88,29.60,1.12,1.12,7.20,2.62,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.74,0.88,29.60,1.12,1.12,7.20,2.62,2.74,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.74,0.88,29.60,1.12,1.12,7.20,2.62,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.74,0.88,29.60,1.12,1.12,7.20,2.62,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.74,0.88,29.60,1.12,1.12,7.20,2.62,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.74,0.88,29.60,1.12,1.12,7.20,2.62,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.74,0.88,29.60,1.12,1.12,7.20,2.62,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.74,0.88,29.60,1.12,1.12,7.20,2.62,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.74,0.88,29.60,1.12,1.12,7.20,2.62,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.74,0.88,29.60,1.12,1.12,7.20,2.62,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.74,0.88,29.60,1.12,1.12,7.20,2.62,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.74,0.88,29.60,1.12,1.12,7.20,2.62,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.74,0.88,29.60,1.12,1.12,7.20,2.62,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.74,0.88,29.60,1.12,1.12,7.20,2.62,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.74,0.88,36.80,1.12,1.12,7.20,2.62,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.74,0.88,36.80,1.12,1.12,7.20,2.62,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,36.80,1.12,1.12,7.20,2.62,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,36.80,1.12,1.12,7.20,2.62,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,36.80,1.12,1.12,7.20,2.62,2.76,12.17,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,36.80,1.12,1.12,7.20,2.62,2.76,12.17,3.02,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,36.80,1.12,1.12,7.20,2.62,2.76,12.17,3.02,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,36.80,1.12,1.12,7.20,2.62,2.76,12.17,3.02,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,36.80,1.12,1.12,7.25,2.62,2.76,12.17,3.02,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,36.80,1.12,1.12,7.25,2.62,2.76,12.17,3.02,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,36.80,1.12,1.12,7.25,2.63,2.76,12.17,3.02,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,36.80,1.12,1.12,7.25,2.63,2.76,12.17,3.02,0.90 +2.88,12.14,1.80,8.21,3.30,3.67,0.88,36.80,1.12,1.12,7.25,2.63,2.76,12.17,3.02,0.90 +2.88,12.14,1.80,8.15,3.30,3.67,0.88,36.80,1.12,1.12,7.25,2.63,2.76,12.17,3.02,0.90 +2.88,12.14,1.80,8.15,3.30,3.67,0.88,36.80,1.12,1.12,7.25,2.63,2.76,12.17,3.02,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,36.80,1.12,1.12,7.25,2.63,2.76,12.17,3.02,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,36.80,1.12,1.12,7.25,2.63,2.76,12.17,3.02,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,36.80,1.12,1.12,7.25,2.63,2.76,12.17,3.02,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,36.80,1.12,1.12,7.25,2.63,2.72,12.17,3.02,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,36.80,1.12,1.12,7.25,2.63,2.72,12.17,3.02,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,36.80,1.12,1.12,7.25,2.63,2.72,12.17,3.02,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,36.80,1.12,1.12,7.25,2.63,2.72,12.17,3.02,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,36.80,1.12,1.12,7.25,2.63,2.72,12.17,3.02,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,36.80,1.12,1.12,7.25,2.63,2.72,12.17,3.02,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,36.80,1.12,1.12,7.25,2.63,2.72,12.17,3.02,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,36.80,1.12,1.12,7.25,2.63,2.72,12.17,3.02,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,36.80,1.12,1.12,7.25,2.63,2.72,12.17,3.02,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,36.80,1.12,1.12,7.25,2.63,2.72,12.17,3.02,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,36.80,1.12,1.12,7.25,2.63,2.72,12.17,3.02,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,36.80,1.12,1.12,7.25,2.63,2.72,12.17,3.02,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.70,1.12,1.12,7.25,2.63,2.72,12.17,3.02,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.70,1.12,1.12,7.25,2.63,2.72,12.17,3.02,0.90 +2.88,12.15,1.80,8.15,3.30,3.67,0.88,29.70,1.12,1.12,7.25,2.63,2.72,12.17,3.02,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.25,2.63,2.72,12.17,3.02,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.25,2.63,2.72,12.17,3.02,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.25,2.63,2.72,12.17,3.11,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.25,2.63,2.72,12.17,3.11,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.25,2.63,2.72,12.17,3.11,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.63,2.72,12.17,3.11,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.68,2.72,12.17,3.11,0.90 +2.88,12.15,1.80,8.15,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.68,2.72,12.17,3.11,0.90 +2.88,12.15,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.68,2.72,12.17,3.11,0.90 +2.88,12.15,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.68,2.72,12.17,3.11,0.90 +2.88,12.14,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.68,2.72,12.17,3.11,0.90 +2.88,12.14,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.68,2.72,12.17,3.11,0.90 +2.88,12.14,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.68,2.72,12.17,3.11,0.90 +2.88,12.14,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.68,2.74,12.17,3.11,0.90 +2.88,12.14,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.68,2.74,12.17,3.11,0.90 +2.88,12.14,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.68,2.74,12.17,3.11,0.90 +2.88,12.14,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.68,2.74,12.17,3.11,0.90 +2.88,12.14,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.68,2.74,12.17,3.11,0.90 +2.88,12.14,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.68,2.74,12.17,3.11,0.90 +2.88,12.14,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.68,2.74,12.17,3.11,0.90 +2.88,12.14,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.68,2.74,12.17,3.11,0.90 +2.88,12.14,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.68,2.74,12.17,3.11,0.90 +2.88,12.14,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.68,2.74,12.17,3.11,0.90 +2.88,12.14,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.68,2.74,12.17,3.11,0.90 +2.88,12.14,1.80,8.06,3.30,3.71,0.88,29.70,1.12,1.12,7.18,2.68,2.74,12.17,3.11,0.90 +2.88,12.14,1.80,8.06,3.30,3.71,0.88,29.60,1.12,1.12,7.18,2.68,2.74,12.17,3.11,0.90 +2.88,12.14,1.80,8.06,3.30,3.71,0.88,29.60,1.12,1.12,7.18,2.68,2.74,12.17,3.11,0.90 +2.88,12.14,1.80,8.06,3.30,3.71,0.88,29.60,1.12,1.12,7.18,2.68,2.74,12.17,3.11,0.90 +2.88,12.14,1.80,8.06,3.30,3.66,0.88,29.60,1.12,1.12,7.18,2.68,2.74,12.17,3.11,0.90 +2.88,12.14,1.80,8.06,3.30,3.66,0.88,29.60,1.12,1.12,7.18,2.68,2.74,12.17,3.11,0.90 +2.88,12.14,1.80,8.06,3.30,3.66,0.88,29.60,1.12,1.12,7.18,2.68,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.06,3.30,3.66,0.88,29.60,1.12,1.12,7.18,2.68,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.06,3.30,3.66,0.88,29.60,1.12,1.12,7.18,2.68,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.06,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.68,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.06,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.68,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.06,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.68,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.06,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.63,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.06,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.63,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.63,2.74,12.17,3.17,0.90 +2.88,12.14,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.63,2.74,12.17,3.17,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.63,2.74,12.17,3.17,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.63,2.74,12.17,3.17,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.63,2.74,12.17,3.17,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.63,2.76,12.17,3.17,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.63,2.76,12.17,3.17,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.63,2.76,12.17,3.17,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.63,2.76,12.17,3.17,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.63,2.76,12.17,3.17,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.63,2.76,12.17,3.17,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.63,2.76,12.17,3.17,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.63,2.76,12.17,3.17,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.63,2.76,12.17,3.17,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.63,2.76,12.17,3.17,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.63,2.76,12.17,3.17,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.60,1.12,1.12,7.17,2.63,2.76,12.17,3.17,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.70,1.12,1.12,7.17,2.63,2.76,12.17,3.17,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.70,1.12,1.12,7.17,2.63,2.76,12.17,3.17,0.90 +2.88,12.15,1.80,8.18,3.30,3.66,0.88,29.70,1.12,1.12,7.17,2.63,2.76,12.17,3.17,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.17,2.63,2.76,12.17,3.17,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.17,2.63,2.76,12.17,3.17,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.17,2.63,2.76,12.17,3.17,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.17,2.63,2.76,12.17,3.13,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.17,2.63,2.76,12.17,3.13,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.25,2.63,2.76,12.17,3.13,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.25,2.63,2.76,12.17,3.13,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.25,2.63,2.76,12.17,3.13,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.25,2.64,2.76,12.17,3.13,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.25,2.64,2.76,12.17,3.13,0.90 +2.88,12.15,1.80,8.18,3.30,3.65,0.88,29.70,1.12,1.12,7.25,2.64,2.76,12.17,3.13,0.90 +2.88,12.15,1.80,8.07,3.30,3.65,0.88,29.70,1.12,1.12,7.25,2.64,2.76,12.17,3.13,0.90 +2.88,12.14,1.80,8.07,3.30,3.65,0.88,29.70,1.12,1.12,7.25,2.64,2.76,12.17,3.13,0.90 +2.88,12.14,1.80,8.07,3.30,3.65,0.88,29.70,1.12,1.12,7.25,2.64,2.76,12.17,3.13,0.90 +2.88,12.14,1.80,8.07,3.30,3.65,0.88,29.70,1.12,1.12,7.25,2.64,2.76,12.17,3.13,0.90 +2.88,12.14,1.80,8.07,3.30,3.65,0.88,29.70,1.12,1.12,7.25,2.64,2.76,12.18,3.13,0.90 +2.88,12.14,1.80,8.07,3.30,3.65,0.88,29.70,1.12,1.12,7.25,2.64,2.76,12.18,3.13,0.90 +2.88,12.14,1.80,8.07,3.30,3.65,0.88,29.70,1.12,1.12,7.25,2.64,2.76,12.18,3.13,0.90 +2.88,12.14,1.80,8.07,3.30,3.65,0.88,29.70,1.12,1.12,7.25,2.64,2.76,12.18,3.13,0.90 +2.88,12.14,1.80,8.07,3.30,3.65,0.88,29.70,1.12,1.12,7.25,2.64,2.76,12.18,3.13,0.90 +2.88,12.14,1.80,8.07,3.30,3.65,0.88,29.70,1.12,1.12,7.25,2.64,2.76,12.18,3.13,0.90 +2.88,12.14,1.80,8.07,3.30,3.65,0.88,29.70,1.12,1.12,7.25,2.64,2.76,12.18,3.13,0.90 +2.88,12.14,1.80,8.07,3.30,3.65,0.88,29.70,1.12,1.12,7.25,2.64,2.76,12.18,3.13,0.90 +2.88,12.14,1.80,8.07,3.30,3.65,0.88,29.70,1.12,1.12,7.25,2.64,2.76,12.18,3.13,0.90 +2.88,12.14,1.80,8.07,3.30,3.65,0.88,29.70,1.12,1.12,7.25,2.64,2.76,12.18,3.13,0.90 +2.88,12.14,1.80,8.07,3.30,3.65,0.88,29.70,1.12,1.12,7.25,2.64,2.76,12.18,3.13,0.90 +2.88,12.14,1.80,8.07,3.30,3.65,0.88,29.70,1.12,1.12,7.25,2.64,2.76,12.18,3.13,0.90 +2.88,12.14,1.80,8.07,3.30,3.65,0.88,29.70,1.12,1.12,7.25,2.64,2.76,12.18,3.13,0.90 +2.88,12.14,1.80,8.07,3.30,3.65,0.88,39.10,1.12,1.12,7.25,2.64,2.76,12.18,3.13,0.90 +2.88,12.14,1.80,8.07,3.30,3.65,0.88,39.10,1.12,1.12,7.25,2.64,2.76,12.18,3.13,0.90 +2.88,12.14,1.80,8.07,3.30,3.98,0.88,39.10,1.12,1.12,7.25,2.64,2.76,12.18,3.13,0.90 +2.88,12.14,1.80,8.07,3.30,3.98,0.88,39.10,1.12,1.12,7.25,2.64,2.76,12.18,3.13,0.90 +2.88,12.14,1.80,8.07,3.30,3.98,0.88,39.10,1.12,1.12,7.25,2.64,2.76,12.18,3.38,0.90 +2.88,12.14,1.80,8.07,3.30,3.98,0.88,39.10,1.12,1.12,7.25,2.64,2.76,12.18,3.38,0.90 +2.88,12.14,1.80,8.07,3.30,3.98,0.88,39.10,1.12,1.12,7.25,2.64,2.76,12.18,3.38,0.90 +2.88,12.14,1.80,8.07,3.30,3.98,0.88,39.10,1.12,1.12,7.14,2.64,2.76,12.18,3.38,0.90 +2.88,12.14,1.80,8.07,3.30,3.98,0.88,39.10,1.12,1.12,7.14,2.64,2.76,12.18,3.38,0.90 +2.88,12.14,1.80,8.07,3.30,3.98,0.88,39.10,1.12,1.12,7.14,2.60,2.76,12.18,3.38,0.90 +2.88,12.14,1.80,8.07,3.30,3.98,0.88,39.10,1.12,1.12,7.14,2.60,2.76,12.18,3.38,0.90 +2.88,12.14,1.80,8.07,3.30,3.98,0.88,39.10,1.12,1.12,7.14,2.60,2.76,12.18,3.38,0.90 +2.88,12.14,1.80,8.21,3.30,3.98,0.88,39.10,1.12,1.12,7.14,2.60,2.76,12.18,3.38,0.90 +2.88,12.14,1.80,8.21,3.30,3.98,0.88,39.10,1.12,1.12,7.14,2.60,2.76,12.18,3.38,0.90 +2.88,12.14,1.80,8.21,3.30,3.98,0.88,39.10,1.12,1.12,7.14,2.60,2.76,12.18,3.38,0.90 +2.88,12.14,1.80,8.21,3.30,3.98,0.88,39.10,1.12,1.12,7.14,2.60,2.76,12.18,3.38,0.90 +2.88,12.14,1.80,8.21,3.30,3.98,0.88,39.10,1.12,1.12,7.14,2.60,2.76,12.18,3.38,0.90 +2.88,12.14,1.80,8.21,3.30,3.98,0.88,39.10,1.12,1.12,7.14,2.60,2.74,12.18,3.38,0.90 +2.88,12.14,1.80,8.21,3.30,3.98,0.88,39.10,1.12,1.12,7.14,2.60,2.74,12.18,3.38,0.90 +2.88,12.14,1.80,8.21,3.30,3.98,0.88,39.10,1.12,1.12,7.14,2.60,2.74,12.18,3.38,0.90 +2.88,12.14,1.80,8.21,3.30,3.98,0.88,39.10,1.12,1.12,7.14,2.60,2.74,12.18,3.38,0.90 +2.88,12.14,1.80,8.21,3.30,3.98,0.88,39.10,1.12,1.12,7.14,2.60,2.74,12.18,3.38,0.90 +2.88,12.14,1.80,8.21,3.30,3.98,0.88,39.10,1.12,1.12,7.14,2.60,2.74,12.18,3.38,0.90 +2.88,12.14,1.80,8.21,3.30,3.98,0.88,39.10,1.12,1.12,7.14,2.60,2.74,12.18,3.38,0.90 +2.88,12.14,1.80,8.21,3.30,3.98,0.88,39.10,1.12,1.12,7.14,2.60,2.74,12.18,3.38,0.90 +2.88,12.14,1.80,8.21,3.30,3.98,0.88,39.10,1.12,1.12,7.14,2.60,2.74,12.18,3.38,0.90 +2.88,12.14,1.80,8.21,3.30,3.98,0.88,39.10,1.12,1.12,7.14,2.60,2.74,12.18,3.38,0.90 +2.88,12.14,1.80,8.21,3.30,3.98,0.88,39.10,1.12,1.12,7.14,2.60,2.74,12.18,3.38,0.90 +2.88,12.14,1.80,8.21,3.30,3.98,0.88,39.10,1.12,1.12,7.14,2.60,2.74,12.18,3.38,0.90 +2.88,12.14,1.80,8.21,3.30,3.98,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.18,3.38,0.90 +2.88,12.14,1.80,8.21,3.30,3.98,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.18,3.38,0.90 +2.88,12.14,1.80,8.21,3.30,3.98,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.18,3.38,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.18,3.38,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.18,3.38,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.18,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.18,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.70,1.12,1.12,7.14,2.60,2.74,12.18,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.70,1.12,1.12,7.16,2.60,2.74,12.18,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.70,1.12,1.12,7.16,2.60,2.74,12.18,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.70,1.12,1.12,7.16,2.61,2.74,12.18,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.70,1.12,1.12,7.16,2.61,2.74,12.18,3.09,0.90 +2.88,12.14,1.80,8.21,3.30,3.66,0.88,29.70,1.12,1.12,7.16,2.61,2.74,12.18,3.09,0.90 +2.88,12.14,1.80,8.07,3.30,3.66,0.88,29.70,1.12,1.12,7.16,2.61,2.74,12.18,3.09,0.90 +2.88,12.14,1.80,8.07,3.30,3.66,0.88,29.70,1.12,1.12,7.16,2.61,2.74,12.18,3.09,0.90 +2.88,12.15,1.80,8.07,3.30,3.66,0.88,29.70,1.12,1.12,7.16,2.61,2.74,12.18,3.09,0.90 +2.88,12.15,1.80,8.07,3.30,3.66,0.88,29.70,1.12,1.12,7.16,2.61,2.74,12.18,3.09,0.90 +2.88,12.15,1.80,8.07,3.30,3.66,0.88,29.70,1.12,1.12,7.16,2.61,2.74,12.18,3.09,0.90 +2.88,12.15,1.80,8.07,3.30,3.66,0.88,29.70,1.12,1.12,7.16,2.61,2.78,12.18,3.09,0.90 +2.88,12.15,1.80,8.07,3.30,3.66,0.88,29.70,1.12,1.12,7.16,2.61,2.78,12.18,3.09,0.90 +2.88,12.15,1.80,8.07,3.30,3.66,0.88,29.70,1.12,1.12,7.16,2.61,2.78,12.18,3.09,0.90 +2.88,12.15,1.80,8.07,3.30,3.66,0.88,29.70,1.12,1.12,7.16,2.61,2.78,12.18,3.09,0.90 +2.88,12.15,1.80,8.07,3.30,3.66,0.88,29.70,1.12,1.12,7.16,2.61,2.78,12.18,3.09,0.90 +2.88,12.15,1.80,8.07,3.30,3.66,0.88,29.70,1.12,1.12,7.16,2.61,2.78,12.18,3.09,0.90 +2.88,12.15,1.80,8.07,3.30,3.66,0.88,29.70,1.12,1.12,7.16,2.61,2.78,12.18,3.09,0.90 +2.88,12.15,1.80,8.07,3.30,3.66,0.88,29.70,1.12,1.12,7.16,2.61,2.78,12.18,3.09,0.90 +2.88,12.15,1.80,8.07,3.30,3.66,0.88,29.70,1.12,1.12,7.16,2.61,2.78,12.18,3.09,0.90 +2.88,12.15,1.80,8.07,3.30,3.66,0.88,29.70,1.12,1.12,7.16,2.61,2.78,12.18,3.09,0.90 +2.88,12.15,1.80,8.07,3.30,3.66,0.88,29.70,1.12,1.12,7.16,2.61,2.78,12.18,3.09,0.90 +2.88,12.15,1.80,8.07,3.30,3.66,0.88,29.70,1.12,1.12,7.16,2.61,2.78,12.18,3.09,0.90 +2.88,12.15,1.80,8.07,3.30,3.66,0.88,29.60,1.12,1.12,7.16,2.61,2.78,12.18,3.09,0.90 +2.88,12.15,1.80,8.07,3.30,3.66,0.88,29.60,1.12,1.12,7.16,2.61,2.78,12.18,3.09,0.90 +2.88,12.15,1.80,8.07,3.30,3.66,0.88,29.60,1.12,1.12,7.16,2.61,2.78,12.18,3.09,0.90 +2.88,12.15,1.80,8.07,3.30,3.73,0.88,29.60,1.12,1.12,7.16,2.61,2.78,12.18,3.09,0.90 +2.88,12.15,1.80,8.07,3.30,3.73,0.88,29.60,1.12,1.12,7.16,2.61,2.78,12.18,3.09,0.90 +2.88,12.15,1.80,8.07,3.30,3.73,0.88,29.60,1.12,1.12,7.16,2.61,2.78,12.18,3.16,0.90 +2.88,12.15,1.80,8.07,3.30,3.73,0.88,29.60,1.12,1.12,7.16,2.61,2.78,12.18,3.16,0.90 +2.88,12.15,1.80,8.07,3.30,3.73,0.88,29.60,1.12,1.12,7.16,2.61,2.78,12.18,3.16,0.90 +2.88,12.15,1.80,8.07,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.61,2.78,12.18,3.16,0.90 +2.88,12.15,1.80,8.07,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.61,2.78,12.18,3.16,0.90 +2.88,12.15,1.80,8.07,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.61,2.78,12.18,3.16,0.90 +2.88,12.15,1.80,8.07,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.60,2.78,12.18,3.16,0.90 +2.88,12.15,1.80,8.07,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.60,2.78,12.18,3.16,0.90 +2.88,12.15,1.80,8.07,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.60,2.78,12.18,3.16,0.90 +2.88,12.15,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.60,2.78,12.18,3.16,0.90 +2.88,12.15,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.60,2.78,12.18,3.16,0.90 +2.86,12.15,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.60,2.78,12.18,3.16,0.90 +2.86,12.15,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.60,2.78,12.17,3.16,0.90 +2.86,12.15,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.60,2.74,12.17,3.16,0.90 +2.86,12.15,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.60,2.74,12.17,3.16,0.90 +2.86,12.15,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.60,2.74,12.17,3.16,0.90 +2.86,12.15,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.60,2.74,12.17,3.16,0.90 +2.86,12.15,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.60,2.74,12.17,3.16,0.90 +2.86,12.15,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.60,2.74,12.17,3.16,0.90 +2.86,12.15,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.60,2.74,12.17,3.16,0.90 +2.86,12.15,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.60,2.74,12.17,3.16,0.90 +2.86,12.15,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.60,2.74,12.17,3.16,0.90 +2.86,12.15,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.60,2.74,12.17,3.16,0.90 +2.86,12.15,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.60,2.74,12.17,3.16,0.90 +2.86,12.15,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.60,2.74,12.17,3.16,0.90 +2.86,12.15,1.80,8.17,3.30,3.73,0.88,29.60,1.12,1.12,7.17,2.60,2.74,12.17,3.16,0.90 +2.86,12.15,1.80,8.17,3.30,3.73,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.17,3.16,0.90 +2.86,12.15,1.80,8.17,3.30,3.73,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.17,3.16,0.90 +2.86,12.15,1.80,8.17,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.17,3.16,0.90 +2.86,12.15,1.80,8.17,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.17,3.16,0.90 +2.86,12.15,1.80,8.17,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.17,3.16,0.90 +2.86,12.15,1.80,8.17,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.17,3.12,0.90 +2.86,12.15,1.80,8.17,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.17,3.12,0.90 +2.86,12.15,1.80,8.17,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.17,3.12,0.90 +2.86,12.15,1.80,8.17,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.17,3.12,0.90 +2.86,12.15,1.80,8.17,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.60,2.74,12.17,3.12,0.90 +2.86,12.15,1.80,8.17,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.63,2.74,12.17,3.12,0.90 +2.86,12.15,1.80,8.17,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.63,2.74,12.17,3.12,0.90 +2.86,12.15,1.80,8.17,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.63,2.74,12.17,3.12,0.90 +2.86,12.15,1.80,8.09,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.63,2.74,12.17,3.12,0.90 +2.86,12.15,1.80,8.09,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.63,2.74,12.17,3.12,0.90 +2.86,12.14,1.80,8.09,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.63,2.74,12.17,3.12,0.90 +2.86,12.14,1.80,8.09,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.63,2.74,12.17,3.12,0.90 +2.86,12.14,1.80,8.09,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.63,2.74,12.17,3.12,0.90 +2.86,12.14,1.80,8.09,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.63,2.76,12.17,3.12,0.90 +2.86,12.14,1.80,8.09,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.63,2.76,12.17,3.12,0.90 +2.86,12.14,1.80,8.09,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.63,2.76,12.17,3.12,0.90 +2.86,12.14,1.80,8.09,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.63,2.76,12.17,3.12,0.90 +2.86,12.14,1.80,8.09,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.63,2.76,12.17,3.12,0.90 +2.86,12.14,1.80,8.09,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.63,2.76,12.17,3.12,0.90 +2.86,12.14,1.80,8.09,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.63,2.76,12.17,3.12,0.90 +2.86,12.14,1.80,8.09,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.63,2.76,12.17,3.12,0.90 +2.86,12.14,1.80,8.09,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.63,2.76,12.17,3.12,0.90 +2.86,12.14,1.80,8.09,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.63,2.76,12.17,3.12,0.90 +2.86,12.14,1.80,8.09,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.63,2.76,12.17,3.12,0.90 +2.86,12.14,1.80,8.09,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.63,2.76,12.17,3.12,0.90 +2.86,12.14,1.80,8.09,3.30,3.69,0.88,29.70,1.12,1.12,7.17,2.63,2.76,12.17,3.12,0.90 +2.86,12.14,1.80,8.09,3.30,3.69,0.88,29.60,1.12,1.12,7.17,2.63,2.76,12.17,3.12,0.90 +2.86,12.14,1.80,8.09,3.30,3.69,0.88,29.60,1.12,1.12,7.17,2.63,2.76,12.17,3.12,0.90 +2.86,12.14,1.80,8.09,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.63,2.76,12.17,3.12,0.90 +2.86,12.14,1.80,8.09,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.63,2.76,12.17,3.12,0.90 +2.86,12.14,1.80,8.09,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.63,2.76,12.17,3.12,0.90 +2.86,12.14,1.80,8.09,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.63,2.76,12.17,3.16,0.90 +2.86,12.14,1.80,8.09,3.30,3.77,0.88,29.60,1.12,1.12,7.17,2.63,2.76,12.17,3.16,0.90 +2.86,12.14,1.80,8.09,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.16,0.90 +2.86,12.14,1.80,8.09,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.16,0.90 +2.86,12.14,1.80,8.09,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.63,2.76,12.17,3.16,0.90 +2.86,12.14,1.80,8.09,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.17,3.16,0.90 +2.86,12.14,1.80,8.09,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.17,3.16,0.90 +2.86,12.14,1.80,8.35,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.17,3.16,0.90 +2.86,12.14,1.80,8.35,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.17,3.16,0.90 +2.86,12.14,1.80,8.35,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.17,3.16,0.90 +3.02,12.14,1.80,8.35,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.17,3.16,0.90 +3.02,12.14,1.80,8.35,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.17,3.16,0.90 +3.02,12.14,1.80,8.35,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.17,3.16,0.90 +3.02,12.14,1.80,8.35,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.17,3.16,0.90 +3.02,12.14,1.80,8.35,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.17,3.16,0.90 +3.02,12.14,1.80,8.35,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.17,3.16,0.90 +3.02,12.14,1.80,8.35,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.17,3.16,0.90 +3.02,12.14,1.80,8.35,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.17,3.16,0.90 +3.02,12.14,1.80,8.35,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.17,3.16,0.90 +3.02,12.14,1.80,8.35,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.17,3.16,0.90 +3.02,12.14,1.80,8.35,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.17,3.16,0.90 +3.02,12.14,1.80,8.35,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.17,3.16,0.90 +3.02,12.14,1.80,8.35,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.17,3.16,0.90 +3.02,12.14,1.80,8.35,3.30,3.77,0.88,29.60,1.12,1.12,7.21,2.62,2.76,12.17,3.16,0.90 +3.02,12.14,1.80,8.35,3.30,3.77,0.88,29.70,1.12,1.12,7.21,2.62,2.76,12.17,3.16,0.90 +3.02,12.14,1.80,8.35,3.30,3.77,0.88,29.70,1.12,1.12,7.21,2.62,2.76,12.17,3.16,0.90 +3.02,12.14,1.80,8.35,3.30,3.77,0.88,29.70,1.12,1.12,7.21,2.62,2.76,12.17,3.16,0.90 +3.02,12.14,1.80,8.35,3.30,3.66,0.88,29.70,1.12,1.12,7.21,2.62,2.76,12.17,3.16,0.90 +3.02,12.14,1.80,8.35,3.30,3.66,0.88,29.70,1.12,1.12,7.21,2.62,2.76,12.17,3.16,0.90 +3.02,12.14,1.80,8.35,3.30,3.66,0.88,29.70,1.12,1.12,7.21,2.62,2.76,12.17,3.16,0.90 +3.02,12.14,1.80,8.35,3.30,3.66,0.88,29.70,1.12,1.12,7.21,2.62,2.76,12.17,3.18,0.90 +3.02,12.14,1.80,8.35,3.30,3.66,0.88,29.70,1.12,1.12,7.21,2.62,2.76,12.17,3.18,0.90 +3.02,12.14,1.80,8.35,3.30,3.66,0.88,29.70,1.12,1.12,7.14,2.62,2.76,12.17,3.18,0.90 +3.02,12.14,1.80,8.35,3.30,3.66,0.88,29.70,1.12,1.12,7.14,2.62,2.76,12.17,3.18,0.90 +3.02,12.14,1.80,8.35,3.30,3.66,0.88,29.70,1.12,1.12,7.14,2.62,2.76,12.17,3.18,0.90 From 5d343b200fc418481f19189664fbf64f03f378c2 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Sun, 17 Oct 2021 14:34:04 +0200 Subject: [PATCH 69/76] minor change --- examples/helper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/helper.cpp b/examples/helper.cpp index e1ceed3..a0ec83e 100644 --- a/examples/helper.cpp +++ b/examples/helper.cpp @@ -7,7 +7,7 @@ using namespace std; -unsigned bit_reversed(unsigned x, unsigned bits) { +unsigned bit_reversed(unsigned x, const unsigned bits) { unsigned y = 0; for (unsigned i = 0; i < bits; i++) { y <<= 1; From 08e97e10d4649f96c9f8d5a2e20a8d334ff17df4 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Sun, 17 Oct 2021 14:36:54 +0200 Subject: [PATCH 70/76] updated changelog --- CHANGELOG | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 7e4b868..cf1b300 100755 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,21 +1,15 @@ # Changelog + All notable changes to this project will be documented in this file. ## Unreleased + - configurable CL platform and device -## [2.0.0] - [] +## [1.0.0] - [18.10.2021] +- 1d, 2d, 3d FFT with variants on the location of the transposition - Batched 3D FFT to schedule multiple transformations by overlapping data transfers and FFT computations - Using OpenCL Shared Virtual Memory (SVM) for data transfers between FPGA and host - -## [1.0.0] - [16.06.2020] - -### Added -- 3d FFT in 2 varients, in bram and ddr transpose -- 2d FFT in 2 varients, in bram and ddr transpose -- 1d FFT -- CI/CD -- gtests -- CMake build -- License \ No newline at end of file +- Unit tests +- CMake build system From 6152fd072fc453ec707f00ef22d713722d85af42 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Sun, 17 Oct 2021 14:51:06 +0200 Subject: [PATCH 71/76] ci: test fft1d --- .gitlab-ci.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1ac227b..4a7ae35 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -4,6 +4,7 @@ variables: stages: - build - test + - test_fft1d build-all: stage: build @@ -31,3 +32,9 @@ test-all: - CL_CONFIG_CPU_EMULATE_DEVICES=1 ./test_fftfpga dependencies: - build-all + +test-fft1d: + stage: test_fft1d + script: + - cd build/bin/ + - ./fft -n 64 -d 1 --emulate -p p520_hpc_sg280l/emulation/fft1d_64_nointer/fft1d.aocx From 9b11859cbf0d29133671d8eb30e6751a2d3d5044 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Sun, 17 Oct 2021 14:59:57 +0200 Subject: [PATCH 72/76] updated ci with 2d, 3d variants --- .gitlab-ci.yml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4a7ae35..26858a2 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -5,6 +5,10 @@ stages: - build - test - test_fft1d + - test_fft2d_ddr + - test_fft2d_bram + - test_fft3d_ddr + - test_fft3d_bram build-all: stage: build @@ -38,3 +42,27 @@ test-fft1d: script: - cd build/bin/ - ./fft -n 64 -d 1 --emulate -p p520_hpc_sg280l/emulation/fft1d_64_nointer/fft1d.aocx + +test-fft2d_ddr: + stage: test_fft2d_ddr + script: + - cd build/bin/ + - ./fft -n 64 -d 2 --emulate -p p520_hpc_sg280l/emulation/fft2d_ddr_64_nointer/fft2d_ddr.aocx + +test-fft2d_bram: + stage: test_fft2d_bram + script: + - cd build/bin/ + - ./fft -n 64 -d 2 --emulate --use_bram -p p520_hpc_sg280l/emulation/fft2d_bram_64_nointer/fft2d_bram.aocx + +test-fft3d_ddr: + stage: test_fft3d_ddr + script: + - cd build/bin/ + - ./fft -n 64 -d 3 --emulate -p p520_hpc_sg280l/emulation/fft3d_ddr_64_nointer/fft3d_ddr.aocx + +test-fft3d_bram: + stage: test_fft3d_bram + script: + - cd build/bin/ + - ./fft -n 64 -d 3 --emulate --use_bram -p p520_hpc_sg280l/emulation/fft3d_bram_64_nointer/fft3d_bram.aocx \ No newline at end of file From 8e5c316890634bfcb722088c67346dd653280a41 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Sun, 17 Oct 2021 15:07:00 +0200 Subject: [PATCH 73/76] removed fft3d bram due to lack of cpu resources --- .gitlab-ci.yml | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 26858a2..6237d59 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -8,7 +8,6 @@ stages: - test_fft2d_ddr - test_fft2d_bram - test_fft3d_ddr - - test_fft3d_bram build-all: stage: build @@ -59,10 +58,4 @@ test-fft3d_ddr: stage: test_fft3d_ddr script: - cd build/bin/ - - ./fft -n 64 -d 3 --emulate -p p520_hpc_sg280l/emulation/fft3d_ddr_64_nointer/fft3d_ddr.aocx - -test-fft3d_bram: - stage: test_fft3d_bram - script: - - cd build/bin/ - - ./fft -n 64 -d 3 --emulate --use_bram -p p520_hpc_sg280l/emulation/fft3d_bram_64_nointer/fft3d_bram.aocx \ No newline at end of file + - ./fft -n 64 -d 3 --emulate -p p520_hpc_sg280l/emulation/fft3d_ddr_64_nointer/fft3d_ddr.aocx \ No newline at end of file From 58b72790139cbfab1c8656224bbd6d35568589aa Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Mon, 18 Oct 2021 13:50:53 +0200 Subject: [PATCH 74/76] cmake build type selection --- CMakeLists.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 32ae2ea..4b81d7a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,6 +20,15 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") find_package(FFTW REQUIRED) message("-- FFTW found") +# Set a default build type if none was specified +if(NOT CMAKE_BUILD_TYPE) + message(STATUS "Setting build type to 'Debug' as none was specified.") + set(CMAKE_BUILD_TYPE Debug CACHE STRING "Choose the type of build." FORCE) + # Set the possible values of build type for cmake-gui + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" + "RelWithDebInfo") +endif() + # sub directories add_subdirectory(api) add_subdirectory(kernels) From b8a7795acb6e35df5de5ac80c9620ad57c4f04fe Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Mon, 18 Oct 2021 13:51:52 +0200 Subject: [PATCH 75/76] removed caching cmake variables for sdk and bsp --- kernels/CMakeLists.txt | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernels/CMakeLists.txt b/kernels/CMakeLists.txt index 4d6ed0c..154d487 100644 --- a/kernels/CMakeLists.txt +++ b/kernels/CMakeLists.txt @@ -12,29 +12,34 @@ else() message(ERROR, "No Target board found") endif() -set(SDK_VERSION $ENV{QUARTUS_VERSION} CACHE STRING "SDK Version") +# SDK and BSP Versions are used in setting target paths for bitstreams +set(SDK_VERSION $ENV{QUARTUS_VERSION}) if(SDK_VERSION) message("-- SDK Version: ${SDK_VERSION}") else() message(ERROR, "No SDK Version Found") endif() -set(BSP_VERSION $ENV{QUARTUS_VERSION_BSP} CACHE STRING "BSP Version") +set(BSP_VERSION $ENV{QUARTUS_VERSION_BSP}) if(BSP_VERSION) message("-- BSP Version: ${BSP_VERSION}") else() message(ERROR, "No BSP Found") endif() +# Default number of points used per cycle in an FFT computation +# Currently, supports 8 points per cycle set(LOG_POINTS 3 CACHE STRING "Log of per sample data points") math(EXPR POINTS "1 << ${LOG_POINTS}") +# Number of points in each dimension of the FFT being computed set(LOG_FFT_SIZE 6 CACHE STRING "Log of points of FFT") set_property(CACHE LOG_FFT_SIZE PROPERTY STRINGS 4 5 6 7 8 9) math(EXPR FFT_SIZE "1 << ${LOG_FFT_SIZE}") message("-- FFT size is ${FFT_SIZE}") math(EXPR DEPTH "1 << (${LOG_FFT_SIZE} + ${LOG_FFT_SIZE} - ${LOG_POINTS})") +# Toggle to append the right parameters to AOC Flags set(BURST_INTERLEAVING CACHE BOOL "Enable burst interleaving") if(BURST_INTERLEAVING) set(INTERLEAVING "") From 8cebccdbf4c111ae9655c5cf307ba71a9e786351 Mon Sep 17 00:00:00 2001 From: Arjun Ramaswami Date: Mon, 18 Oct 2021 13:52:11 +0200 Subject: [PATCH 76/76] modified readme and updated userguide --- README.md | 28 +++++++--- docs/userguide.md | 133 +++++++++++++++++++++++++++------------------- 2 files changed, 98 insertions(+), 63 deletions(-) diff --git a/README.md b/README.md index b54455e..a192b94 100644 --- a/README.md +++ b/README.md @@ -38,22 +38,22 @@ Firstly, *dependencies* for building the system Once you have this covered, execute the following: ```bash -mkdir build && cd build # Directory to store build outputs +mkdir build && cd build cmake .. make ``` You have built the *API* i.e., the OpenCL host code that invokes different transformations correctly are packed into a static library. This must be linked to an application. -A sample application that helps invoke the APIs. +You have also compiled a sample application that helps invoke these APIs. -*Strictly said*, you have built the following: +*Strictly said*, you have done the following: - `fftfpga` static library, linked such as `-lfftfpga` - `fftfpga/fftfpga.h` header file - `fft` - a sample application which links and includes the above two. -Now, for the real deal, synthesizing the OpenCL FFT kernels. These can be synthesized to run on software emulation or on hardware as bitstreams. +Now onto synthesizing the OpenCL FFT kernels. These can be synthesized to run on software emulation or on hardware as bitstreams. - Emulation @@ -69,7 +69,7 @@ make _syn make fft3d_ddr_syn ``` -Putting them all together, set the path to the synthesized bitstream along with other correct configurations as command line parameters to the sample application generated, to execute the transformation. +Putting them all together, in order to execute the required FFT, set the path to the synthesized bitstream along with other correct configurations as command line parameters to the sample application generated. ```bash ./fft --num=64 --dim=3 --path=fft3d_ddr_128.aocx @@ -77,7 +77,19 @@ Putting them all together, set the path to the synthesized bitstream along with *Tip*: for emulation, use the `--emulate` command line parameter. -For explanations regarding the command line options and the OpenCL kernels, check out x and y in the advanced guide. +### List of Kernels + +| | Kernel Name | Description | +| :-- | :---------- | :---------------------------------- | +| 1D | fft1d | OpenCL design provided by Intel | +| 2D | fft2d\_ddr | DDR memory is used for 2D Transpose | +| | fft2d\_bram | BRAM is used for 2D Transpose | +| 3D | fft3d\_ddr | DDR memory is used for 3D Transpose | +| | fft3d\_bram | BRAM is used for 3D Transpose | + +These kernels can be synthesized by appending `_emulate` or `_syn` to its suffix such as `fft1d_emulate`. + +Please checkout the [User Guide](docs/userguide.md) for more information such as configuration options etc. ## Publications @@ -91,8 +103,8 @@ FFTFPGA has been cited in the following publications: ## Related Repositories -- ConvFPGA - an OpenCL based library for FFT-based convolution on FPGAs -- FFTFPGA-eval +- [ConvFPGA](https://github.com/pc2/ConvFPGA) - an OpenCL based library for FFT-based convolution on FPGAs +- [FFTFPGA-eval](https://git.uni-paderborn.de/arjunr/fftfpga-eval) - archives reports and measurements from FFTFPGA and ConvFPGA ## Contact diff --git a/docs/userguide.md b/docs/userguide.md index d889a38..de7c584 100644 --- a/docs/userguide.md +++ b/docs/userguide.md @@ -9,63 +9,72 @@ - `scripts`: convenience slurm scripts - `docs` : describes models regarding performance and resource utilization -## Build System +## CMake Build Setup ### External Libraries -These additional libraries that are automatically fetched during system configuration: +These additional libraries are automatically fetched during system configuration: - [cxxopts](https://github.com/jarro2783/cxxopts) for command line argument parsing - [hlslib](https://github.com/definelicht/hlslib) for CMake Intel FPGA OpenCL find packages - [findFFTW](https://github.com/egpbos/findFFTW.git) for CMake FFTW find package - [gtest](https://github.com/google/googletest.git) for unit tests -### List of Kernels +### Configuration Options -| | Kernel Name | Description | -| :-- | :---------- | :---------------------------------- | -| 1D | fft1d | OpenCL design provided by Intel | -| 2D | fft2d\_ddr | DDR memory is used for 2D Transpose | -| | fft2d\_bram | BRAM is used for 2D Transpose | -| 3D | fft3d\_ddr | DDR memory is used for 3D Transpose | -| | fft3d\_bram | BRAM is used for 3D Transpose | +The following compile options can be set when creating a CMake build directory either using the `-D` parameter or by using the cmake-gui such as: -These kernels can be synthesized by appending `_emulate` or `_syn` to its suffix such as `fft1d_emulate`. +`cmake -DCMAKE_BUILD_TYPE=Release ..` + +`ccmake ..` + +| ** Name ** | ** Description ** | ** Default Values ** | ** Alternate Values ** | +| :-------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------- | :----------------------------------- | :---------------------------- | +| `AOC\_FLAGS*` * | Intel offline compiler flags used for kernel compilation | `-g -v -no-interleaving=default` | | +| `EMU\_FLAGS` | Compiler flags used for emulation, with fast emulation as default | `-march=emulator` | | +| `FPGA\_BOARD\_NAME` | Name of the target FPGA board | `p520\_hpc\_sg280l` | `pac\_s10\_usm` | +| `LOG\_FFT\_SIZE` | Currently supported log2 number of points along each FFT dimension | 6 | 5, 7, 8, 9  | +| `BURST\_INTERLEAVING*` | Toggle to enable burst interleaved global memory accesses
Sets the `-no-interleaving=` to the `AOC\_FLAGS*` *parameter*  | NO | YES | +| `DDR\_BUFFER\_LOCATION` | Name of the global memory interface found in the `board\_spec.xml`
`DDR` :`p520\_hpc\_sg280l`, `device` : `pac\_s10\_usm` board  | `DDR` | `device` | +| `SVM\_BUFFER\_LOCATION` | Name of the SVM global memory interface found in the `board\_spec.xml*` *
"" : `p520\_hpc\_sg280l`, `host`: `pac\_s10\_usm` | | `host` | +| `CMAKE\_BUILD\_TYPE` | Specify the build type | `Debug` | `Release`, `RelWithDebInfo` | ### Additional Kernel Builds -Generation of aocl reports +Generation of Intel OpenCL Offline Compiler reports ```bash make _report make fft1d_report ``` -## Compile Definitions - -Using ccmake or by setting it using -D - -- `LOG_SIZE`: set the log of the length of the matrix. Example: `-DLOG_SIZE=6`. - -## Enabling Shared Virtual Memory Extensions (SVM) - -Currently tested for pacd5005 board. The board specification required setting the following attributes to global memory accesses, hence it has been set automatically. Otherwise, it can be set under the variable names. +### Using the GNU Debugger (gdb) with the Debug builds +CMake debug builds lets you step through code using gdb. +`gdb --args ./fft -n 64 -d 2 -p ` ## Runtime Input Parameters ```bash - -h, --help show this help message and exit - -Basic Options - -n, --n= FFT Points - -s, --sp Single Precision - -i, --iter= Iterations - -b, --back Backward FFT - -v, --svm Use SVM - -m, --bram Use BRAM - -p, --path= Path to bitstreamm +Offloading FFT on FPGA +Usage: + ./fft<..> [OPTION...] + + -n, --num arg Number of sample points in a dimension (default: 64) + -d, --dim arg Number of dimensions (default: 3) + -b, --back Toggle Backward FFT + -i, --iter arg Number of iterations (default: 1) + -p, --path arg Path to FPGA bitstream + -y, --noverify Toggle to not verify with FFTW + -c, --batch arg Number of batches of FFT calculations in FPGA (default: 1) + -t, --burst Toggle to use burst interleaved global memory accesses in + FPGA + -m, --use_bram Toggle to use BRAM instead of DDR for 3D Transpose + -s, --use_usm Toggle to use Unified Shared Memory features for data + transfers between host and device + -e, --emulate Toggle to enable emulation + -h, --help Print usage ``` ## Output Interpretation @@ -74,36 +83,50 @@ The examples measure and output relevant performance metrics that are shown belo ```bash ------------------------------------------ -FFT Configuration: +FFT CONFIGURATION: -------------------------------------------- -Type = Complex to Complex -Points = 64 -Precision = Single -Direction = Forward -Placement = In Place -Iterations = 1 +Type : Complex to Complex +Points : 64 +Direction : Forward +Placement : In Place +Batch : 1 +Iterations : 1 +Transpose3D : DDR +Burst Interleaving : No +Emulation : Yes +USM Feature : No -------------------------------------------- - - Initializing FPGA ... - Getting program binary from path emu_64_fft3d_bram/fft3d_bram.aocx ... - Building program ... - FFT kernel initialization is complete. - Cleaning up FPGA resources ... - +-- Initializing FPGA ... +-- 1 platforms found + 0: intel(r) fpga emulation platform for opencl(tm) +-- 1 devices found + Choosing first device by default +-- Getting program binary from path: p520_hpc_sg280l/emulation/fft1d_64_nointer/fft1d.aocx +-- Building the program +0: Calculating FFT - +-- Launching 1D FFT of 1 batches +Launching FFT transform for 1 batch +-- Copying data from host to device +-- Executing kernels +-- Transfering results back to host +-- Cleaning up FPGA resources ... ------------------------------------------ -Measurements +Measurements -------------------------------------------- -Points = 64 -Precision = Single -Direction = Forward -PCIe Write = 0.03ms -Kernel Execution = 0.48ms -PCIe Read = 0.02ms -Throughput = 0.00GFLOPS/s | 0.00 GB/s +PCIe Write = 0.0000ms +Kernel Execution = 0.0182ms +Kernel Exec/Batch = 0.0182ms +PCIe Read = 0.0000ms +Total = 0.0182ms +Throughput = 0.0982GFLOPS/s | 26.8213 GB/s ``` -- `PCIe Write` and `PCIe Read` the time taken in milliseconds for transfer of data from host to global memory through PCIe bus. +- `PCIe Write`: time taken in milliseconds to transfer data from host memory of the CPU to the global memory of the FPGA. + +- `PCIe Read` : the time taken in milliseconds to transfer data from global memory of the FPGA to the host memory of the CPU. -- `Kernel Execution` represents the time taken in milliseconds for the execution of the OpenCL implementation that includes the global memory accesses. +- `Kernel Execution` : the time taken in milliseconds for the execution of the required kernels, which includes the global memory accesses. +- `Total` : `PCIe Write` + `Kernel Execution` + `PCIe Read` +- `Throughput` : $$ \frac{dim * 5 * N^{dim} * log_2 N}{runtime}$$ \ No newline at end of file