6 changes: 3 additions & 3 deletions llvm/test/DebugInfo/dwarfdump-debug-frame-simple.test
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@
; FRAMES-NEXT: DW_CFA_nop:

; FRAMES: 00000014 00000010 00000000 FDE cie=00000000 pc=00000000...00000022
; FRAMES: DW_CFA_advance_loc: 3
; FRAMES: DW_CFA_advance_loc: 3 to 0x3
; FRAMES-NEXT: DW_CFA_def_cfa_offset: +12
; FRAMES-NEXT: DW_CFA_nop:

; FRAMES: 00000028 00000014 00000000 FDE cie=00000000 pc=00000030...00000080
; FRAMES: DW_CFA_advance_loc: 1
; FRAMES: DW_CFA_advance_loc: 1 to 0x31
; FRAMES-NEXT: DW_CFA_def_cfa_offset: +8
; FRAMES-NEXT: DW_CFA_offset: {{reg5|EBP}} -8
; FRAMES-NEXT: DW_CFA_advance_loc: 2
; FRAMES-NEXT: DW_CFA_advance_loc: 2 to 0x33
; FRAMES-NEXT: DW_CFA_def_cfa_register: {{reg5|EBP}}

; FRAMES-NOT: CIE
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Instrumentation/AddressSanitizer/aarch64be.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
; RUN: opt < %s -passes=asan -S -mtriple=aarch64_be-linux-gnu | FileCheck --check-prefix=CHECK-AARCH64BE %s
; REQUIRES: aarch64-registered-target

define i32 @read_4_bytes(i32* %a) sanitize_address {
define i32 @read_4_bytes(ptr %a) sanitize_address {
entry:
%tmp1 = load i32, i32* %a, align 4
%tmp1 = load i32, ptr %a, align 4
ret i32 %tmp1
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ target datalayout = "P1"

define i1 @b(i64 %c) addrspace(1) {
%cast = inttoptr i64 %c to ptr addrspace(42)
%cmp = icmp ugt ptr addrspace(42) %cast, getelementptr inbounds ([1 x i32], ptr addrspace(42) @a, i64 0, i64 0)
%cmp = icmp ugt ptr addrspace(42) %cast, @a
ret i1 %cmp
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,17 @@

target triple = "x86_64-unknown-linux-gnu"

declare void @llvm.instrprof.increment.step(i8*, i64, i32, i32, i64)
declare void @llvm.instrprof.increment.step(ptr, i64, i32, i32, i64)

declare void @llvm.instrprof.value.profile(i8*, i64, i64, i32, i32)
declare void @llvm.instrprof.value.profile(ptr, i64, i64, i32, i32)

; CHECK: @__profd_foo = private global
@__profn_foo = private constant [3 x i8] c"foo"

define i32 @foo(i32 ()* ) {
%2 = ptrtoint i32 ()* %0 to i64
call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 0, i64 %2, i32 0, i32 0)
call void @llvm.instrprof.increment.step(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 0, i32 1, i32 0, i64 0)
define i32 @foo(ptr ) {
%2 = ptrtoint ptr %0 to i64
call void @llvm.instrprof.value.profile(ptr @__profn_foo, i64 0, i64 %2, i32 0, i32 0)
call void @llvm.instrprof.increment.step(ptr @__profn_foo, i64 0, i32 1, i32 0, i64 0)
%3 = tail call i32 %0()
ret i32 %3
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ target triple = "aarch64-unknown-linux-gnu"
; CHECK: @__profc_foo = private global [9 x i8] c"\FF\FF\FF\FF\FF\FF\FF\FF\FF", section "__llvm_prf_cnts", comdat, align 8

define void @_Z3foov() {
call void @llvm.instrprof.timestamp(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12345678, i32 9, i32 0)
call void @llvm.instrprof.timestamp(ptr @__profn_foo, i64 12345678, i32 9, i32 0)
; CHECK: call void @__llvm_profile_set_timestamp(ptr @__profc_foo)
call void @llvm.instrprof.cover(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12345678, i32 9, i32 8)
call void @llvm.instrprof.cover(ptr @__profn_foo, i64 12345678, i32 9, i32 8)
ret void
}

declare void @llvm.instrprof.timestamp(i8*, i64, i32, i32)
declare void @llvm.instrprof.cover(i8*, i64, i32, i32)
declare void @llvm.instrprof.timestamp(ptr, i64, i32, i32)
declare void @llvm.instrprof.cover(ptr, i64, i32, i32)
8 changes: 4 additions & 4 deletions llvm/test/Instrumentation/InstrProfiling/timestamp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ target triple = "aarch64-unknown-linux-gnu"
; CHECK: @__profc_foo = private global [2 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat, align 8

define void @_Z3foov() {
call void @llvm.instrprof.timestamp(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12345678, i32 2, i32 0)
call void @llvm.instrprof.timestamp(ptr @__profn_foo, i64 12345678, i32 2, i32 0)
; CHECK: call void @__llvm_profile_set_timestamp(ptr @__profc_foo)
call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12345678, i32 2, i32 1)
call void @llvm.instrprof.increment(ptr @__profn_foo, i64 12345678, i32 2, i32 1)
ret void
}

declare void @llvm.instrprof.timestamp(i8*, i64, i32, i32)
declare void @llvm.instrprof.increment(i8*, i64, i32, i32)
declare void @llvm.instrprof.timestamp(ptr, i64, i32, i32)
declare void @llvm.instrprof.increment(ptr, i64, i32, i32)
10 changes: 5 additions & 5 deletions llvm/test/Object/Inputs/small.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@ target triple = "i386-pc-windows"

define i32 @main() nounwind {
entry:
%call = tail call i32 @puts(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0)) nounwind
tail call void bitcast (void (...)* @SomeOtherFunction to void ()*)() nounwind
%call = tail call i32 @puts(ptr @.str) nounwind
tail call void @SomeOtherFunction() nounwind
ret i32 0
}

declare i32 @puts(i8* nocapture) nounwind
declare i32 @puts(ptr nocapture) nounwind

declare void @SomeOtherFunction(...)

@var = global i32 0
@llvm.used = appending global [1 x i8*] [i8* bitcast (i32* @var to i8*)], section "llvm.metadata"
@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* null, i8* null }]
@llvm.used = appending global [1 x ptr] [ptr @var], section "llvm.metadata"
@llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr null, ptr null }]
10 changes: 5 additions & 5 deletions llvm/test/Object/Inputs/trivial.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@

define i32 @main() nounwind {
entry:
%call = tail call i32 @puts(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0)) nounwind
tail call void bitcast (void (...)* @SomeOtherFunction to void ()*)() nounwind
%call = tail call i32 @puts(ptr @.str) nounwind
tail call void @SomeOtherFunction() nounwind
ret i32 0
}

declare i32 @puts(i8* nocapture) nounwind
declare i32 @puts(ptr nocapture) nounwind

declare void @SomeOtherFunction(...)

@var = global i32 0
@llvm.used = appending global [1 x i8*] [i8* bitcast (i32* @var to i8*)], section "llvm.metadata"
@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* null, i8* null }]
@llvm.used = appending global [1 x ptr] [ptr @var], section "llvm.metadata"
@llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr null, ptr null }]
4 changes: 2 additions & 2 deletions llvm/test/Object/X86/irsymtab-bad-alias.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ target triple = "x86_64-unknown-linux-gnu"
@g1 = global i32 1
@g2 = global i32 2

@a = alias i32, inttoptr(i32 sub (i32 ptrtoint (i32* @g1 to i32),
i32 ptrtoint (i32* @g2 to i32)) to i32*)
@a = alias i32, inttoptr(i32 sub (i32 ptrtoint (ptr @g1 to i32),
i32 ptrtoint (ptr @g2 to i32)) to ptr)
10 changes: 5 additions & 5 deletions llvm/test/Object/X86/nm-ir.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,15 @@ module asm ".long undef_asm_sym"
@g3 = common global i32 0
@g4 = private global i32 42

@a1 = alias i32, i32* @g1
@a2 = internal alias i32, i32* @g1
@a1 = alias i32, ptr @g1
@a2 = internal alias i32, ptr @g1

define void ()* @f1() {
define ptr @f1() {
call void @f5()
ret void ()* null
ret ptr null
}

@ifunc_f1 = ifunc void (), void ()* ()* @f1
@ifunc_f1 = ifunc void (), ptr @f1

define internal void @f2() {
ret void
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Object/dllimport-globalref.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ target triple = "x86_64-pc-windows-msvc"
; CHECK: U f

declare dllimport void @f()
@fp = constant void ()* @f
@fp = constant ptr @f
2 changes: 1 addition & 1 deletion llvm/test/Object/dllimport.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@ declare dllimport void @f()

define void @g() {
call void @f()
store i32 42, i32* @v
store i32 42, ptr @v
ret void
}
4 changes: 2 additions & 2 deletions llvm/test/Object/mangle-ir.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ target datalayout = "m:o"
; CHECK-NOT: memcpy

define void @f() {
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* null, i8* null, i64 0, i1 false)
tail call void @llvm.memcpy.p0.p0.i64(ptr null, ptr null, i64 0, i1 false)
ret void
}

declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1)
8 changes: 4 additions & 4 deletions llvm/test/Object/objc-swift-mixed-imageinfo-macho.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@

target triple = "x86_64-apple-macosx10.15.0"

@llvm.used = appending global [1 x i8*] [i8* bitcast (i16* @__swift_reflection_version to i8*)], section "llvm.metadata", align 8
@llvm.used = appending global [1 x ptr] [ptr @__swift_reflection_version], section "llvm.metadata", align 8
@__swift_reflection_version = linkonce_odr hidden constant i16 3

define i32 @main(i32 %0, i8** %1) #0 {
%3 = bitcast i8** %1 to i8*
define i32 @main(i32 %0, ptr %1) #0 {
%3 = bitcast ptr %1 to ptr
ret i32 0
}

Expand All @@ -25,7 +25,7 @@ attributes #0 = { "frame-pointer"="all" "target-cpu"="penryn" "target-features"=
!1 = !{!"-lswiftSwiftOnoneSupport"}
!2 = !{!"-lswiftCore"}
!3 = !{!"-lobjc"}
!4 = !{[1 x i8*]* @llvm.used, null, null, i1 false, i1 true}
!4 = !{ptr @llvm.used, null, null, i1 false, i1 true}
!5 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 15]}
!6 = !{i32 1, !"Objective-C Version", i32 2}
!7 = !{i32 1, !"Objective-C Image Info Version", i32 0}
Expand Down
34 changes: 17 additions & 17 deletions llvm/test/tools/llvm-readobj/ELF/unwind.test
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,9 @@

# CHECK: Program:
# CHECK-NEXT: DW_CFA_def_cfa_offset: +16
# CHECK-NEXT: DW_CFA_advance_loc: 6
# CHECK-NEXT: DW_CFA_advance_loc: 6 to 0x4004a6
# CHECK-NEXT: DW_CFA_def_cfa_offset: +24
# CHECK-NEXT: DW_CFA_advance_loc: 10
# CHECK-NEXT: DW_CFA_advance_loc: 10 to 0x4004b0
# CHECK-NEXT: DW_CFA_def_cfa_expression: DW_OP_breg7 +8, DW_OP_breg16 +0, DW_OP_lit15, DW_OP_and, DW_OP_lit11, DW_OP_ge, DW_OP_lit3, DW_OP_shl, DW_OP_plus
# CHECK-NEXT: DW_CFA_nop:
# CHECK-NEXT: DW_CFA_nop:
Expand All @@ -110,12 +110,12 @@
# CHECK-NEXT: address_range: 0x10 (end : 0x4005c6)

# CHECK: Program:
# CHECK-NEXT: DW_CFA_advance_loc: 1
# CHECK-NEXT: DW_CFA_advance_loc: 1 to 0x4005b7
# CHECK-NEXT: DW_CFA_def_cfa_offset: +16
# CHECK-NEXT: DW_CFA_offset: reg6 -16
# CHECK-NEXT: DW_CFA_advance_loc: 3
# CHECK-NEXT: DW_CFA_advance_loc: 3 to 0x4005ba
# CHECK-NEXT: DW_CFA_def_cfa_register: reg6
# CHECK-NEXT: DW_CFA_advance_loc: 11
# CHECK-NEXT: DW_CFA_advance_loc: 11 to 0x4005c5
# CHECK-NEXT: DW_CFA_def_cfa: reg7 +8
# CHECK-NEXT: DW_CFA_nop:
# CHECK-NEXT: DW_CFA_nop:
Expand All @@ -126,15 +126,15 @@
# CHECK-NEXT: address_range: 0xc7f (end : 0x40124f)

# CHECK: Program:
# CHECK-NEXT: DW_CFA_advance_loc: 5
# CHECK-NEXT: DW_CFA_advance_loc: 5 to 0x4005d5
# CHECK-NEXT: DW_CFA_def_cfa: reg10 +0
# CHECK-NEXT: DW_CFA_advance_loc: 9
# CHECK-NEXT: DW_CFA_advance_loc: 9 to 0x4005de
# CHECK-NEXT: DW_CFA_expression: reg6 DW_OP_breg6 +0
# CHECK-NEXT: DW_CFA_advance_loc: 5
# CHECK-NEXT: DW_CFA_advance_loc: 5 to 0x4005e3
# CHECK-NEXT: DW_CFA_def_cfa_expression: DW_OP_breg6 -8, DW_OP_deref
# CHECK-NEXT: DW_CFA_advance_loc2: 3174
# CHECK-NEXT: DW_CFA_advance_loc2: 3174 to 0x401249
# CHECK-NEXT: DW_CFA_def_cfa: reg10 +0
# CHECK-NEXT: DW_CFA_advance_loc: 5
# CHECK-NEXT: DW_CFA_advance_loc: 5 to 0x40124e
# CHECK-NEXT: DW_CFA_def_cfa: reg7 +8
# CHECK-NEXT: DW_CFA_nop:
# CHECK-NEXT: DW_CFA_nop:
Expand All @@ -146,21 +146,21 @@
# CHECK-NEXT: address_range: 0x66 (end : 0x4012b6)

# CHECK: Program:
# CHECK-NEXT: DW_CFA_advance_loc: 1
# CHECK-NEXT: DW_CFA_advance_loc: 1 to 0x401251
# CHECK-NEXT: DW_CFA_def_cfa_offset: +16
# CHECK-NEXT: DW_CFA_offset: reg6 -16
# CHECK-NEXT: DW_CFA_advance_loc: 3
# CHECK-NEXT: DW_CFA_advance_loc: 3 to 0x401254
# CHECK-NEXT: DW_CFA_def_cfa_register: reg6
# CHECK-NEXT: DW_CFA_advance_loc: 2
# CHECK-NEXT: DW_CFA_advance_loc: 2 to 0x401256
# CHECK-NEXT: DW_CFA_offset: reg15 -24
# CHECK-NEXT: DW_CFA_advance_loc: 5
# CHECK-NEXT: DW_CFA_advance_loc: 5 to 0x40125b
# CHECK-NEXT: DW_CFA_offset: reg14 -32
# CHECK-NEXT: DW_CFA_advance_loc: 7
# CHECK-NEXT: DW_CFA_advance_loc: 7 to 0x401262
# CHECK-NEXT: DW_CFA_offset: reg13 -40
# CHECK-NEXT: DW_CFA_offset: reg12 -48
# CHECK-NEXT: DW_CFA_advance_loc: 8
# CHECK-NEXT: DW_CFA_advance_loc: 8 to 0x40126a
# CHECK-NEXT: DW_CFA_offset: reg3 -56
# CHECK-NEXT: DW_CFA_advance_loc1: 75
# CHECK-NEXT: DW_CFA_advance_loc1: 75 to 0x4012b5
# CHECK-NEXT: DW_CFA_def_cfa: reg7 +8
# CHECK-NEXT: DW_CFA_nop:
# CHECK-NEXT: DW_CFA_nop:
Expand Down
7 changes: 5 additions & 2 deletions llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ void PrinterContext<ELFT>::printEHFrame(const Elf_Shdr *EHFrameShdr) const {
reportError(std::move(E), ObjF.getFileName());

for (const dwarf::FrameEntry &Entry : EHFrame) {
std::optional<uint64_t> InitialLocation;
if (const dwarf::CIE *CIE = dyn_cast<dwarf::CIE>(&Entry)) {
W.startLine() << format("[0x%" PRIx64 "] CIE length=%" PRIu64 "\n",
Address + CIE->getOffset(), CIE->getLength());
Expand All @@ -214,8 +215,9 @@ void PrinterContext<ELFT>::printEHFrame(const Elf_Shdr *EHFrameShdr) const {
Address + FDE->getLinkedCIE()->getOffset());
W.indent();

InitialLocation = FDE->getInitialLocation();
W.startLine() << format("initial_location: 0x%" PRIx64 "\n",
FDE->getInitialLocation());
*InitialLocation);
W.startLine() << format(
"address_range: 0x%" PRIx64 " (end : 0x%" PRIx64 ")\n",
FDE->getAddressRange(),
Expand All @@ -227,7 +229,8 @@ void PrinterContext<ELFT>::printEHFrame(const Elf_Shdr *EHFrameShdr) const {
W.indent();
auto DumpOpts = DIDumpOptions();
DumpOpts.IsEH = true;
Entry.cfis().dump(W.getOStream(), DumpOpts, W.getIndentLevel());
Entry.cfis().dump(W.getOStream(), DumpOpts, W.getIndentLevel(),
InitialLocation);
W.unindent();
W.unindent();
W.getOStream() << "\n";
Expand Down
311 changes: 311 additions & 0 deletions openmp/runtime/src/kmp_collapse.cpp

Large diffs are not rendered by default.

11 changes: 9 additions & 2 deletions openmp/runtime/src/kmp_collapse.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@ enum loop_type_t : kmp_int32 {
loop_type_int64 = 7
};

// Defining loop types to handle special cases
enum nested_loop_type_t : kmp_int32 {
nested_loop_type_unkown = 0,
nested_loop_type_lower_triangular_matrix = 1,
nested_loop_type_upper_triangular_matrix = 2
};

/*!
@ingroup WORK_SHARING
* Describes the structure for rectangular nested loops.
Expand Down Expand Up @@ -124,14 +131,14 @@ struct bounds_info_t {
// It's represented in kmp_uint64, but each dimention is calculated in
// that loop IV type. Also dimentions have to be converted to those types
// when used in generated code.
typedef kmp_uint64* kmp_point_t;
typedef kmp_uint64 *kmp_point_t;

// Array: Number of loop iterations on each nesting level to achieve some point,
// in expanded space or in original space.
// OMPTODO: move from using iterations to using offsets (iterations multiplied
// by steps). For those we need to be careful with the types, as step can be
// negative, but it'll remove multiplications and divisions in several places.
typedef kmp_loop_nest_iv_t* kmp_iterations_t;
typedef kmp_loop_nest_iv_t *kmp_iterations_t;

// Internal struct with additional info:
template <typename T> struct bounds_info_internalXX_template {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
// RUN: %libomp-compile-and-run
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "omp.h"

#ifndef MAX_BOUND
#define MAX_BOUND 64
#endif
#ifndef _MSC_VER
#define NO_EFFICIENCY_CHECK
#endif

/* To ensure Correctness, only valid iterations are executed and are executed
only once. Stores the number of times an iteration is executed. */
unsigned *execution_count = NULL;
/* Stores the number of iterations executed by each thread. */
unsigned *iterations_per_thread = NULL;

unsigned *Alloc(unsigned bound1, unsigned bound2) {
return (unsigned *)(malloc(bound1 * bound2 * sizeof(unsigned)));
}

void ZeroOut(unsigned *p, unsigned bound1, unsigned bound2) {
memset(p, 0, bound1 * bound2 * sizeof(unsigned));
}

void Free(unsigned *p) { free((void *)p); }

unsigned *Index(unsigned *p, unsigned i, unsigned j, unsigned bound2) {
return &p[i * bound2 + j];
}

int test(unsigned upper_bound) {

unsigned total_iterations = upper_bound * (upper_bound - 1) / 2;
unsigned num_threads = omp_get_max_threads();
unsigned lower_per_chunk = total_iterations / num_threads;
unsigned upper_per_chunk =
lower_per_chunk + ((total_iterations % num_threads) ? 1 : 0);
int i, j;

omp_set_num_threads(num_threads);

ZeroOut(execution_count, upper_bound, upper_bound);
ZeroOut(iterations_per_thread, num_threads, 1);

#ifdef VERBOSE
fprintf(stderr,
"INFO: Using %6d threads for %6d outer iterations with %6d [%6d:%6d] "
"chunks "
"loop type lower triangle <,< - ",
num_threads, upper_bound, total_iterations, lower_per_chunk,
upper_per_chunk);
#endif

#pragma omp parallel shared(iterations_per_thread, execution_count)
{ /* begin of parallel */
/* Lower triangular execution_count matrix */
#pragma omp for schedule(static) collapse(2)
for (i = 0; i < upper_bound; i++) {
for (j = 0; j < i; j++) {
(*Index(iterations_per_thread, omp_get_thread_num(), 0, 1))++;
(*Index(execution_count, i, j, upper_bound))++;
}
} /* end of for*/
} /* end of parallel */

/* check the execution_count array */
for (i = 0; i < upper_bound; i++) {
for (j = 0; j < i; j++) {
unsigned value = *Index(execution_count, i, j, upper_bound);
/* iteration with j<=i are valid, but should have been executed only once
*/
if (value != 1) {
fprintf(stderr, "ERROR: valid iteration [%i,%i] executed %i times.\n",
i, j, value);
return 0;
}
}
for (j = i; j < upper_bound; j++) {
unsigned value = *Index(execution_count, i, j, upper_bound);
/* iteration with j>=i are invalid and should not have been executed
*/
if (value > 0) {
fprintf(stderr, "ERROR: invalid iteration [%i,%i] executed %i times.\n",
i, j, value);
return 0;
}
}
}

#ifndef NO_EFFICIENCY_CHECK
/* Ensure the number of iterations executed by each thread is within bounds */
for (i = 0; i < num_threads; i++) {
unsigned value = *Index(iterations_per_thread, i, 0, 1);
if (value < lower_per_chunk || value > upper_per_chunk) {
fprintf(stderr,
"ERROR: Inefficient Collapse thread %d of %d assigned %i "
"iterations; must be between %d and %d\n",
i, num_threads, value, lower_per_chunk, upper_per_chunk);
return 0;
}
}
#endif
#ifdef VERBOSE
fprintf(stderr, "PASSED\r\n");
#endif
return 1;
}

int main() {

execution_count = Alloc(MAX_BOUND, MAX_BOUND);
iterations_per_thread = Alloc(omp_get_max_threads(), 1);

for (unsigned j = 0; j < MAX_BOUND; j++) {
if (!test(j))
return 1;
}
Free(execution_count);
Free(iterations_per_thread);
return 0;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
// RUN: %libomp-compile-and-run
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "omp.h"

#ifndef MAX_BOUND
#define MAX_BOUND 64
#endif
#ifndef _MSC_VER
#define NO_EFFICIENCY_CHECK
#endif

/* To ensure Correctness, only valid iterations are executed and are executed
only once. Stores the number of times an iteration is executed. */
unsigned *execution_count = NULL;
/* Stores the number of iterations executed by each thread. */
unsigned *iterations_per_thread = NULL;

unsigned *Alloc(unsigned bound1, unsigned bound2) {
return (unsigned *)(malloc(bound1 * bound2 * sizeof(unsigned)));
}

void ZeroOut(unsigned *p, unsigned bound1, unsigned bound2) {
memset(p, 0, bound1 * bound2 * sizeof(unsigned));
}

void Free(unsigned *p) { free((void *)p); }

unsigned *Index(unsigned *p, unsigned i, unsigned j, unsigned bound2) {
return &p[i * bound2 + j];
}

int test(int upper_bound) {

unsigned total_iterations = upper_bound * (upper_bound + 1) / 2;
unsigned num_threads = omp_get_max_threads();
unsigned lower_per_chunk = total_iterations / num_threads;
unsigned upper_per_chunk =
lower_per_chunk + ((total_iterations % num_threads) ? 1 : 0);
int i, j;

omp_set_num_threads(num_threads);

ZeroOut(execution_count, upper_bound, upper_bound);
ZeroOut(iterations_per_thread, num_threads, 1);

#ifdef VERBOSE
fprintf(stderr,
"INFO: Using %6d threads for %6d outer iterations with %6d [%6d:%6d] "
"chunks "
"loop type lower triangle <,<= - ",
num_threads, upper_bound, total_iterations, lower_per_chunk,
upper_per_chunk);
#endif

#pragma omp parallel shared(iterations_per_thread, execution_count)
{ /* begin of parallel */
/* Lower triangular execution_count matrix */
#pragma omp for schedule(static) collapse(2)
for (i = 0; i < upper_bound; i++) {
for (j = 0; j <= i; j++) {
(*Index(iterations_per_thread, omp_get_thread_num(), 0, 1))++;
(*Index(execution_count, i, j, upper_bound))++;
}
} /* end of for*/
} /* end of parallel */

/* check the execution_count array */
for (i = 0; i < upper_bound; i++) {
for (j = 0; j <= i; j++) {
unsigned value = *Index(execution_count, i, j, upper_bound);
/* iteration with j<=i are valid, but should have been executed only once
*/
if (value != 1) {
fprintf(stderr, "ERROR: valid iteration [%i,%i] executed %i times.\n",
i, j, value);
return 0;
}
}
for (j = i + 1; j < upper_bound; j++) {
unsigned value = *Index(execution_count, i, j, upper_bound);
/* iteration with j>=i are invalid and should not have been executed
*/
if (value > 0) {
fprintf(stderr, "ERROR: invalid iteration [%i,%i] executed %i times.\n",
i, j, value);
return 0;
}
}
}

#ifndef NO_EFFICIENCY_CHECK
/* Ensure the number of iterations executed by each thread is within bounds */
for (i = 0; i < num_threads; i++) {
unsigned value = *Index(iterations_per_thread, i, 0, 1);
if (value < lower_per_chunk || value > upper_per_chunk) {
fprintf(stderr,
"ERROR: Inefficient Collapse thread %d of %d assigned %i "
"iterations; must be between %d and %d\n",
i, num_threads, value, lower_per_chunk, upper_per_chunk);
return 0;
}
}
#endif
#ifdef VERBOSE
fprintf(stderr, "PASSED\r\n");
#endif
return 1;
}

int main() {

execution_count = Alloc(MAX_BOUND, MAX_BOUND);
iterations_per_thread = Alloc(omp_get_max_threads(), 1);

for (unsigned j = 0; j < MAX_BOUND; j++) {
if (!test(j))
return 1;
}
Free(execution_count);
Free(iterations_per_thread);
return 0;
}
124 changes: 124 additions & 0 deletions openmp/runtime/test/worksharing/for/omp_for_collapse_UpperTriangular.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
// RUN: %libomp-compile-and-run
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "omp.h"

#ifndef MAX_BOUND
#define MAX_BOUND 64
#endif
#ifndef _MSC_VER
#define NO_EFFICIENCY_CHECK
#endif

/* To ensure Correctness, only valid iterations are executed and are executed
only once. Stores the number of times an iteration is executed. */
unsigned *execution_count = NULL;
/* Stores the number of iterations executed by each thread. */
unsigned *iterations_per_thread = NULL;

unsigned *Alloc(unsigned bound1, unsigned bound2) {
return (unsigned *)(malloc(bound1 * bound2 * sizeof(unsigned)));
}

void ZeroOut(unsigned *p, unsigned bound1, unsigned bound2) {
memset(p, 0, bound1 * bound2 * sizeof(unsigned));
}

void Free(unsigned *p) { free((void *)p); }

unsigned *Index(unsigned *p, unsigned i, unsigned j, unsigned bound2) {
return &p[i * bound2 + j];
}

int test(unsigned upper_bound) {

unsigned total_iterations = upper_bound * (upper_bound + 1) / 2;
unsigned num_threads = omp_get_max_threads();
unsigned lower_per_chunk = total_iterations / num_threads;
unsigned upper_per_chunk =
lower_per_chunk + ((total_iterations % num_threads) ? 1 : 0);
int i, j;

omp_set_num_threads(num_threads);

ZeroOut(execution_count, upper_bound, upper_bound);
ZeroOut(iterations_per_thread, num_threads, 1);

#ifdef VERBOSE
fprintf(stderr,
"INFO: Using %6d threads for %6d outer iterations with %6d [%6d:%6d] "
"chunks "
"loop type upper triangle <,< - ",
num_threads, upper_bound, total_iterations, lower_per_chunk,
upper_per_chunk);
#endif

#pragma omp parallel shared(iterations_per_thread, execution_count)
{ /* begin of parallel */
/* Lower triangular execution_count matrix */
#pragma omp for schedule(static) collapse(2)
for (i = 0; i < upper_bound; i++) {
for (j = i; j < upper_bound; j++) {
(*Index(iterations_per_thread, omp_get_thread_num(), 0, 1))++;
(*Index(execution_count, i, j, upper_bound))++;
}
} /* end of for*/
} /* end of parallel */

/* check the execution_count array */
for (i = 0; i < upper_bound; i++) {
for (j = i; j < upper_bound; j++) {
unsigned value = *Index(execution_count, i, j, upper_bound);
/* iteration with j<=i are valid, but should have been executed only once
*/
if (value != 1) {
fprintf(stderr, "ERROR: valid iteration [%i,%i] executed %i times.\n",
i, j, value);
return 0;
}
}
for (j = 0; j < i; j++) {
unsigned value = *Index(execution_count, i, j, upper_bound);
/* iteration with j>=i are invalid and should not have been executed
*/
if (value > 0) {
fprintf(stderr, "ERROR: invalid iteration [%i,%i] executed %i times.\n",
i, j, value);
return 0;
}
}
}

#ifndef NO_EFFICIENCY_CHECK
/* Ensure the number of iterations executed by each thread is within bounds */
for (i = 0; i < num_threads; i++) {
unsigned value = *Index(iterations_per_thread, i, 0, 1);
if (value < lower_per_chunk || value > upper_per_chunk) {
fprintf(stderr,
"ERROR: Inefficient Collapse thread %d of %d assigned %i "
"iterations; must be between %d and %d\n",
i, num_threads, value, lower_per_chunk, upper_per_chunk);
return 0;
}
}
#endif
#ifdef VERBOSE
fprintf(stderr, "PASSED\r\n");
#endif
return 1;
}

int main() {

execution_count = Alloc(MAX_BOUND, MAX_BOUND);
iterations_per_thread = Alloc(omp_get_max_threads(), 1);

for (unsigned j = 0; j < MAX_BOUND; j++) {
if (!test(j))
return 1;
}
Free(execution_count);
Free(iterations_per_thread);
return 0;
}