@@ -274,15 +274,18 @@ struct StreamingQRD {
274274 // kFanoutReduction times to reduce fanout
275275 bool j_eq_i[kBanksForFanout ], i_gt_0[kBanksForFanout ],
276276 i_ge_0_j_ge_i[kBanksForFanout ], j_eq_i_plus_1[kBanksForFanout ],
277- i_lt_0[kBanksForFanout ];
277+ i_lt_0[kBanksForFanout ], j_ge_0[ kBanksForFanout ] ;
278278
279279 fpga_tools::UnrolledLoop<kBanksForFanout >([&](auto k) {
280280 i_gt_0[k] = sycl::ext::intel::fpga_reg (i > 0 );
281281 i_lt_0[k] = sycl::ext::intel::fpga_reg (i < 0 );
282282 j_eq_i[k] = sycl::ext::intel::fpga_reg (j == i);
283+ j_ge_0[k] = sycl::ext::intel::fpga_reg (j >= 0 );
283284 i_ge_0_j_ge_i[k] = sycl::ext::intel::fpga_reg (i >= 0 && j >= i);
284285 j_eq_i_plus_1[k] = sycl::ext::intel::fpga_reg (j == i + 1 );
285- s_or_ir_j[k] = sycl::ext::intel::fpga_reg (s_or_ir[j]);
286+ if (j >= 0 ) {
287+ s_or_ir_j[k] = sycl::ext::intel::fpga_reg (s_or_ir[j]);
288+ }
286289 });
287290
288291 // Preload col and a_i with the correct data for the current iteration
@@ -298,14 +301,14 @@ struct StreamingQRD {
298301 // If no i iteration elapsed, we must read the column of
299302 // matrix A directly from the a_load; col then contains a_j
300303
301- if (i_gt_0[fanout_bank_idx]) {
304+ if (i_gt_0[fanout_bank_idx] && j_ge_0[fanout_bank_idx] ) {
302305 col[k] = a_compute[j].template get <k>();
303306 }
304307 // Using an else statement makes the compiler throw an
305308 // inexplicable warning when using non complex types:
306309 // "Compiler Warning: Memory instruction with unresolved
307310 // pointer may lead to bad QoR."
308- if (!i_gt_0[fanout_bank_idx]) {
311+ if (!i_gt_0[fanout_bank_idx] && j_ge_0[fanout_bank_idx] ) {
309312 col[k] = a_load[j].template get <k>();
310313 }
311314
@@ -347,7 +350,7 @@ struct StreamingQRD {
347350 // are either going to be:
348351 // -> overwritten for the matrix Q (q_result)
349352 // -> unused for the a_compute
350- if (i_ge_0_j_ge_i[fanout_bank_idx]) {
353+ if (i_ge_0_j_ge_i[fanout_bank_idx] && j_ge_0[fanout_bank_idx] ) {
351354 q_result[j].template get <k>() = col1[k];
352355 a_compute[j].template get <k>() = col1[k];
353356 }
@@ -484,4 +487,4 @@ struct StreamingQRD {
484487
485488} // namespace fpga_linalg
486489
487- #endif /* __STREAMING_QRD_HPP__ */
490+ #endif /* __STREAMING_QRD_HPP__ */
0 commit comments