Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -passes=argpromotion < %s | FileCheck %s
define internal i32 @foo(ptr %x, i32 %n, i32 %m) {
; CHECK-LABEL: define internal i32 @foo(
; CHECK-SAME: i32 [[X_0_VAL:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[N]], 0
; CHECK-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
; CHECK: [[COND_TRUE]]:
; CHECK-NEXT: br label %[[RETURN:.*]]
; CHECK: [[COND_FALSE]]:
; CHECK-NEXT: [[SUBVAL:%.*]] = sub i32 [[N]], 1
; CHECK-NEXT: [[CALLRET:%.*]] = call i32 @foo(i32 [[X_0_VAL]], i32 [[SUBVAL]], i32 [[X_0_VAL]])
; CHECK-NEXT: [[SUBVAL2:%.*]] = sub i32 [[N]], 2
; CHECK-NEXT: [[CALLRET2:%.*]] = call i32 @foo(i32 [[X_0_VAL]], i32 [[SUBVAL2]], i32 [[M]])
; CHECK-NEXT: [[CMP2:%.*]] = add i32 [[CALLRET]], [[CALLRET2]]
; CHECK-NEXT: br label %[[RETURN]]
; CHECK: [[COND_NEXT:.*]]:
; CHECK-NEXT: br label %[[RETURN]]
; CHECK: [[RETURN]]:
; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[X_0_VAL]], %[[COND_TRUE]] ], [ [[CMP2]], %[[COND_FALSE]] ], [ poison, %[[COND_NEXT]] ]
; CHECK-NEXT: ret i32 [[RETVAL_0]]
;
entry:
%cmp = icmp ne i32 %n, 0
br i1 %cmp, label %cond_true, label %cond_false

cond_true: ; preds = %entry
%val = load i32, ptr %x, align 4
br label %return

cond_false: ; preds = %entry
%val2 = load i32, ptr %x, align 4
%subval = sub i32 %n, 1
%callret = call i32 @foo(ptr %x, i32 %subval, i32 %val2)
%subval2 = sub i32 %n, 2
%callret2 = call i32 @foo(ptr %x, i32 %subval2, i32 %m)
%cmp2 = add i32 %callret, %callret2
br label %return

cond_next: ; No predecessors!
br label %return

return: ; preds = %cond_next, %cond_false, %cond_true
%retval.0 = phi i32 [ %val, %cond_true ], [ %cmp2, %cond_false ], [ poison, %cond_next ]
ret i32 %retval.0
}

define i32 @bar(ptr align(4) dereferenceable(4) %x, i32 %n, i32 %m) {
; CHECK-LABEL: define i32 @bar(
; CHECK-SAME: ptr align 4 dereferenceable(4) [[X:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[X_VAL:%.*]] = load i32, ptr [[X]], align 4
; CHECK-NEXT: [[CALLRET3:%.*]] = call i32 @foo(i32 [[X_VAL]], i32 [[N]], i32 [[M]])
; CHECK-NEXT: br label %[[RETURN:.*]]
; CHECK: [[RETURN]]:
; CHECK-NEXT: ret i32 [[CALLRET3]]
;
entry:
%callret3 = call i32 @foo(ptr %x, i32 %n, i32 %m)
br label %return

return: ; preds = %entry
ret i32 %callret3
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -passes=argpromotion < %s | FileCheck %s
define internal i32 @foo(ptr %x, ptr %y, i32 %n, i32 %m) {
; CHECK-LABEL: define internal i32 @foo(
; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[N]], 0
; CHECK-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
; CHECK: [[COND_TRUE]]:
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[X]], align 4
; CHECK-NEXT: br label %[[RETURN:.*]]
; CHECK: [[COND_FALSE]]:
; CHECK-NEXT: [[VAL2:%.*]] = load i32, ptr [[X]], align 4
; CHECK-NEXT: [[VAL3:%.*]] = load i32, ptr [[Y]], align 4
; CHECK-NEXT: [[SUBVAL:%.*]] = sub i32 [[N]], [[VAL3]]
; CHECK-NEXT: [[CALLRET:%.*]] = call i32 @foo(ptr [[X]], ptr [[Y]], i32 [[SUBVAL]], i32 [[VAL2]])
; CHECK-NEXT: [[SUBVAL2:%.*]] = sub i32 [[N]], 2
; CHECK-NEXT: [[CALLRET2:%.*]] = call i32 @foo(ptr [[Y]], ptr [[X]], i32 [[SUBVAL2]], i32 [[M]])
; CHECK-NEXT: [[CMP2:%.*]] = add i32 [[CALLRET]], [[CALLRET2]]
; CHECK-NEXT: br label %[[RETURN]]
; CHECK: [[COND_NEXT:.*]]:
; CHECK-NEXT: br label %[[RETURN]]
; CHECK: [[RETURN]]:
; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[VAL]], %[[COND_TRUE]] ], [ [[CMP2]], %[[COND_FALSE]] ], [ poison, %[[COND_NEXT]] ]
; CHECK-NEXT: ret i32 [[RETVAL_0]]
;
entry:
%cmp = icmp ne i32 %n, 0
br i1 %cmp, label %cond_true, label %cond_false

cond_true: ; preds = %entry
%val = load i32, ptr %x, align 4
br label %return

cond_false: ; preds = %entry
%val2 = load i32, ptr %x, align 4
%val3 = load i32, ptr %y, align 4
%subval = sub i32 %n, %val3
%callret = call i32 @foo(ptr %x, ptr %y, i32 %subval, i32 %val2)
%subval2 = sub i32 %n, 2
%callret2 = call i32 @foo(ptr %y, ptr %x, i32 %subval2, i32 %m)
%cmp2 = add i32 %callret, %callret2
br label %return

cond_next: ; No predecessors!
br label %return

return: ; preds = %cond_next, %cond_false, %cond_true
%retval.0 = phi i32 [ %val, %cond_true ], [ %cmp2, %cond_false ], [ poison, %cond_next ]
ret i32 %retval.0
}

define i32 @bar(ptr align(4) dereferenceable(4) %x, ptr align(4) dereferenceable(4) %y, i32 %n, i32 %m) {
; CHECK-LABEL: define i32 @bar(
; CHECK-SAME: ptr align 4 dereferenceable(4) [[X:%.*]], ptr align 4 dereferenceable(4) [[Y:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CALLRET3:%.*]] = call i32 @foo(ptr [[X]], ptr [[Y]], i32 [[N]], i32 [[M]])
; CHECK-NEXT: br label %[[RETURN:.*]]
; CHECK: [[RETURN]]:
; CHECK-NEXT: ret i32 [[CALLRET3]]
;
entry:
%callret3 = call i32 @foo(ptr %x, ptr %y, i32 %n, i32 %m)
br label %return

return: ; preds = %entry
ret i32 %callret3
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -passes=argpromotion < %s | FileCheck %s
define internal i32 @zoo(ptr %x, i32 %m) {
; CHECK-LABEL: define internal i32 @zoo(
; CHECK-SAME: i32 [[X_0_VAL:%.*]], i32 [[M:%.*]]) {
; CHECK-NEXT: [[RESZOO:%.*]] = add i32 [[X_0_VAL]], [[M]]
; CHECK-NEXT: ret i32 [[X_0_VAL]]
;
%valzoo = load i32, ptr %x, align 4
%reszoo = add i32 %valzoo, %m
ret i32 %valzoo
}

define internal i32 @foo(ptr %x, ptr %y, i32 %n, i32 %m) {
; CHECK-LABEL: define internal i32 @foo(
; CHECK-SAME: ptr [[X:%.*]], i32 [[Y_0_VAL:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[N]], 0
; CHECK-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
; CHECK: [[COND_TRUE]]:
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[X]], align 4
; CHECK-NEXT: br label %[[RETURN:.*]]
; CHECK: [[COND_FALSE]]:
; CHECK-NEXT: [[VAL2:%.*]] = load i32, ptr [[X]], align 4
; CHECK-NEXT: [[SUBVAL:%.*]] = sub i32 [[N]], [[Y_0_VAL]]
; CHECK-NEXT: [[CALLRET:%.*]] = call i32 @foo(ptr [[X]], i32 [[Y_0_VAL]], i32 [[SUBVAL]], i32 [[VAL2]])
; CHECK-NEXT: [[SUBVAL2:%.*]] = sub i32 [[N]], 2
; CHECK-NEXT: [[CALLRET2:%.*]] = call i32 @foo(ptr [[X]], i32 [[Y_0_VAL]], i32 [[SUBVAL2]], i32 [[M]])
; CHECK-NEXT: [[CMP1:%.*]] = add i32 [[CALLRET]], [[CALLRET2]]
; CHECK-NEXT: [[X_VAL:%.*]] = load i32, ptr [[X]], align 4
; CHECK-NEXT: [[CALLRETFINAL:%.*]] = call i32 @zoo(i32 [[X_VAL]], i32 [[M]])
; CHECK-NEXT: [[CMP2:%.*]] = add i32 [[CMP1]], [[CALLRETFINAL]]
; CHECK-NEXT: br label %[[RETURN]]
; CHECK: [[COND_NEXT:.*]]:
; CHECK-NEXT: br label %[[RETURN]]
; CHECK: [[RETURN]]:
; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[VAL]], %[[COND_TRUE]] ], [ [[CMP2]], %[[COND_FALSE]] ], [ poison, %[[COND_NEXT]] ]
; CHECK-NEXT: ret i32 [[RETVAL_0]]
;
entry:
%cmp = icmp ne i32 %n, 0
br i1 %cmp, label %cond_true, label %cond_false

cond_true: ; preds = %entry
%val = load i32, ptr %x, align 4
br label %return

cond_false: ; preds = %entry
%val2 = load i32, ptr %x, align 4
%val3 = load i32, ptr %y, align 4
%subval = sub i32 %n, %val3
%callret = call i32 @foo(ptr %x, ptr %y, i32 %subval, i32 %val2)
%subval2 = sub i32 %n, 2
%callret2 = call i32 @foo(ptr %x, ptr %y, i32 %subval2, i32 %m)
%cmp1 = add i32 %callret, %callret2
%callretfinal = call i32 @zoo(ptr %x, i32 %m)
%cmp2 = add i32 %cmp1, %callretfinal
br label %return

cond_next: ; No predecessors!
br label %return

return: ; preds = %cond_next, %cond_false, %cond_true
%retval.0 = phi i32 [ %val, %cond_true ], [ %cmp2, %cond_false ], [ poison, %cond_next ]
ret i32 %retval.0
}

define i32 @bar(ptr align(4) dereferenceable(4) %x, ptr align(4) dereferenceable(4) %y, i32 %n, i32 %m) {
; CHECK-LABEL: define i32 @bar(
; CHECK-SAME: ptr align 4 dereferenceable(4) [[X:%.*]], ptr align 4 dereferenceable(4) [[Y:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[Y_VAL:%.*]] = load i32, ptr [[Y]], align 4
; CHECK-NEXT: [[CALLRET3:%.*]] = call i32 @foo(ptr [[X]], i32 [[Y_VAL]], i32 [[N]], i32 [[M]])
; CHECK-NEXT: br label %[[RETURN:.*]]
; CHECK: [[RETURN]]:
; CHECK-NEXT: ret i32 [[CALLRET3]]
;
entry:
%callret3 = call i32 @foo(ptr %x, ptr %y, i32 %n, i32 %m)
br label %return

return: ; preds = %entry
ret i32 %callret3
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -passes=argpromotion < %s | FileCheck %s
define internal i32 @foo(ptr %x, i32 %n, i32 %m) {
; CHECK-LABEL: define internal i32 @foo(
; CHECK-SAME: i32 [[X_0_VAL:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[N]], 0
; CHECK-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
; CHECK: [[COND_TRUE]]:
; CHECK-NEXT: br label %[[RETURN:.*]]
; CHECK: [[COND_FALSE]]:
; CHECK-NEXT: [[SUBVAL:%.*]] = sub i32 [[N]], 1
; CHECK-NEXT: [[CALLRET:%.*]] = call i32 @foo(i32 [[X_0_VAL]], i32 [[SUBVAL]], i32 [[X_0_VAL]])
; CHECK-NEXT: [[SUBVAL2:%.*]] = sub i32 [[N]], 2
; CHECK-NEXT: [[CALLRET2:%.*]] = call i32 @foo(i32 [[X_0_VAL]], i32 [[SUBVAL2]], i32 [[M]])
; CHECK-NEXT: [[CMP2:%.*]] = add i32 [[CALLRET]], [[CALLRET2]]
; CHECK-NEXT: br label %[[RETURN]]
; CHECK: [[COND_NEXT:.*]]:
; CHECK-NEXT: br label %[[RETURN]]
; CHECK: [[RETURN]]:
; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[X_0_VAL]], %[[COND_TRUE]] ], [ [[CMP2]], %[[COND_FALSE]] ], [ poison, %[[COND_NEXT]] ]
; CHECK-NEXT: ret i32 [[RETVAL_0]]
;
entry:
%cmp = icmp ne i32 %n, 0
br i1 %cmp, label %cond_true, label %cond_false

cond_true: ; preds = %entry
%val = load i32, ptr %x, align 4
br label %return

cond_false: ; preds = %entry
%val2 = load i32, ptr %x, align 4
%subval = sub i32 %n, 1
%callret = call i32 @foo(ptr %x, i32 %subval, i32 %val2)
%subval2 = sub i32 %n, 2
%callret2 = call i32 @foo(ptr %x, i32 %subval2, i32 %m)
%cmp2 = add i32 %callret, %callret2
br label %return

cond_next: ; No predecessors!
br label %return

return: ; preds = %cond_next, %cond_false, %cond_true
%retval.0 = phi i32 [ %val, %cond_true ], [ %cmp2, %cond_false ], [ poison, %cond_next ]
ret i32 %retval.0
}

define i32 @bar(ptr align(4) dereferenceable(4) %x, i32 %n, i32 %m) {
; CHECK-LABEL: define i32 @bar(
; CHECK-SAME: ptr align 4 dereferenceable(4) [[X:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[GEPVAL:%.*]] = getelementptr ptr, ptr [[X]], i32 0
; CHECK-NEXT: [[GEPVAL_VAL:%.*]] = load i32, ptr [[GEPVAL]], align 4
; CHECK-NEXT: [[CALLRET3:%.*]] = call i32 @foo(i32 [[GEPVAL_VAL]], i32 [[N]], i32 [[M]])
; CHECK-NEXT: br label %[[RETURN:.*]]
; CHECK: [[RETURN]]:
; CHECK-NEXT: ret i32 [[CALLRET3]]
;
entry:
%gepval = getelementptr ptr, ptr %x, i32 0
%callret3 = call i32 @foo(ptr %gepval, i32 %n, i32 %m)
br label %return

return: ; preds = %entry
ret i32 %callret3
}

define internal i32 @foo2(ptr %x, i32 %n, i32 %m) {
; CHECK-LABEL: define internal i32 @foo2(
; CHECK-SAME: ptr [[X:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[N]], 0
; CHECK-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
; CHECK: [[COND_TRUE]]:
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[X]], align 4
; CHECK-NEXT: br label %[[RETURN:.*]]
; CHECK: [[COND_FALSE]]:
; CHECK-NEXT: [[VAL2:%.*]] = load i32, ptr [[X]], align 4
; CHECK-NEXT: [[SUBVAL:%.*]] = sub i32 [[N]], 1
; CHECK-NEXT: [[CALLRET:%.*]] = call i32 @foo2(ptr [[X]], i32 [[SUBVAL]], i32 [[VAL2]])
; CHECK-NEXT: [[SUBVAL2:%.*]] = sub i32 [[N]], 2
; CHECK-NEXT: [[CALLRET2:%.*]] = call i32 @foo2(ptr [[X]], i32 [[SUBVAL2]], i32 [[M]])
; CHECK-NEXT: [[CMP2:%.*]] = add i32 [[CALLRET]], [[CALLRET2]]
; CHECK-NEXT: br label %[[RETURN]]
; CHECK: [[COND_NEXT:.*]]:
; CHECK-NEXT: br label %[[RETURN]]
; CHECK: [[RETURN]]:
; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[VAL]], %[[COND_TRUE]] ], [ [[CMP2]], %[[COND_FALSE]] ], [ poison, %[[COND_NEXT]] ]
; CHECK-NEXT: ret i32 [[RETVAL_0]]
;
entry:
%cmp = icmp ne i32 %n, 0
br i1 %cmp, label %cond_true, label %cond_false

cond_true: ; preds = %entry
%val = load i32, ptr %x, align 4
br label %return

cond_false: ; preds = %entry
%val2 = load i32, ptr %x, align 4
%subval = sub i32 %n, 1
%callret = call i32 @foo2(ptr %x, i32 %subval, i32 %val2)
%subval2 = sub i32 %n, 2
%callret2 = call i32 @foo2(ptr %x, i32 %subval2, i32 %m)
%cmp2 = add i32 %callret, %callret2
br label %return

cond_next: ; No predecessors!
br label %return

return: ; preds = %cond_next, %cond_false, %cond_true
%retval.0 = phi i32 [ %val, %cond_true ], [ %cmp2, %cond_false ], [ poison, %cond_next ]
ret i32 %retval.0
}

define i32 @bar2(ptr align(4) dereferenceable(4) %x, i32 %n, i32 %m) {
; CHECK-LABEL: define i32 @bar2(
; CHECK-SAME: ptr align 4 dereferenceable(4) [[X:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[GEPVAL:%.*]] = getelementptr ptr, ptr [[X]], i32 4
; CHECK-NEXT: [[CALLRET3:%.*]] = call i32 @foo2(ptr [[GEPVAL]], i32 [[N]], i32 [[M]])
; CHECK-NEXT: br label %[[RETURN:.*]]
; CHECK: [[RETURN]]:
; CHECK-NEXT: ret i32 [[CALLRET3]]
;
entry:
%gepval = getelementptr ptr, ptr %x, i32 4
%callret3 = call i32 @foo2(ptr %gepval, i32 %n, i32 %m)
br label %return

return: ; preds = %entry
ret i32 %callret3
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -passes=argpromotion < %s | FileCheck %s
define internal i32 @foo(ptr %x, ptr %y, i32 %n, i32 %m) {
; CHECK-LABEL: define internal i32 @foo(
; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[N]], 0
; CHECK-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
; CHECK: [[COND_TRUE]]:
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[X]], align 4
; CHECK-NEXT: br label %[[RETURN:.*]]
; CHECK: [[COND_FALSE]]:
; CHECK-NEXT: [[VAL2:%.*]] = load i32, ptr [[X]], align 4
; CHECK-NEXT: [[VAL3:%.*]] = load i32, ptr [[Y]], align 4
; CHECK-NEXT: [[SUBVAL:%.*]] = sub i32 [[N]], [[VAL3]]
; CHECK-NEXT: [[CALLRET:%.*]] = call i32 @foo(ptr [[X]], ptr [[Y]], i32 [[SUBVAL]], i32 [[VAL2]])
; CHECK-NEXT: [[SUBVAL2:%.*]] = sub i32 [[N]], 2
; CHECK-NEXT: [[CALLRET2:%.*]] = call i32 @foo(ptr [[X]], ptr [[X]], i32 [[SUBVAL2]], i32 [[M]])
; CHECK-NEXT: [[CMP2:%.*]] = add i32 [[CALLRET]], [[CALLRET2]]
; CHECK-NEXT: br label %[[RETURN]]
; CHECK: [[COND_NEXT:.*]]:
; CHECK-NEXT: br label %[[RETURN]]
; CHECK: [[RETURN]]:
; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[VAL]], %[[COND_TRUE]] ], [ [[CMP2]], %[[COND_FALSE]] ], [ poison, %[[COND_NEXT]] ]
; CHECK-NEXT: ret i32 [[RETVAL_0]]
;
entry:
%cmp = icmp ne i32 %n, 0
br i1 %cmp, label %cond_true, label %cond_false

cond_true: ; preds = %entry
%val = load i32, ptr %x, align 4
br label %return

cond_false: ; preds = %entry
%val2 = load i32, ptr %x, align 4
%val3 = load i32, ptr %y, align 4
%subval = sub i32 %n, %val3
%callret = call i32 @foo(ptr %x, ptr %y, i32 %subval, i32 %val2)
%subval2 = sub i32 %n, 2
%callret2 = call i32 @foo(ptr %x, ptr %x, i32 %subval2, i32 %m)
%cmp2 = add i32 %callret, %callret2
br label %return

cond_next: ; No predecessors!
br label %return

return: ; preds = %cond_next, %cond_false, %cond_true
%retval.0 = phi i32 [ %val, %cond_true ], [ %cmp2, %cond_false ], [ poison, %cond_next ]
ret i32 %retval.0
}

define i32 @bar(ptr align(4) dereferenceable(4) %x, ptr align(4) dereferenceable(4) %y, i32 %n, i32 %m) {
; CHECK-LABEL: define i32 @bar(
; CHECK-SAME: ptr align 4 dereferenceable(4) [[X:%.*]], ptr align 4 dereferenceable(4) [[Y:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CALLRET3:%.*]] = call i32 @foo(ptr [[X]], ptr [[Y]], i32 [[N]], i32 [[M]])
; CHECK-NEXT: br label %[[RETURN:.*]]
; CHECK: [[RETURN]]:
; CHECK-NEXT: ret i32 [[CALLRET3]]
;
entry:
%callret3 = call i32 @foo(ptr %x, ptr %y, i32 %n, i32 %m)
br label %return

return: ; preds = %entry
ret i32 %callret3
}
280 changes: 280 additions & 0 deletions mlir/include/mlir/Support/CyclicReplacerCache.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,280 @@
//===- CyclicReplacerCache.h ------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains helper classes for caching replacer-like functions that
// map values between two domains. They are able to handle replacer logic that
// contains self-recursion.
//
//===----------------------------------------------------------------------===//

#ifndef MLIR_SUPPORT_CYCLICREPLACERCACHE_H
#define MLIR_SUPPORT_CYCLICREPLACERCACHE_H

#include "mlir/IR/Visitors.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
#include <set>

namespace mlir {

//===----------------------------------------------------------------------===//
// CyclicReplacerCache
//===----------------------------------------------------------------------===//

/// A cache for replacer-like functions that map values between two domains. The
/// difference compared to just using a map to cache in-out pairs is that this
/// class is able to handle replacer logic that is self-recursive (and thus may
/// cause infinite recursion in the naive case).
///
/// This class provides a hook for the user to perform cycle pruning when a
/// cycle is identified, and is able to perform context-sensitive caching so
/// that the replacement result for an input that is part of a pruned cycle can
/// be distinct from the replacement result for the same input when it is not
/// part of a cycle.
///
/// In addition, this class allows deferring cycle pruning until specific inputs
/// are repeated. This is useful for cases where not all elements in a cycle can
/// perform pruning. The user still must guarantee that at least one element in
/// any given cycle can perform pruning. Even if not, an assertion will
/// eventually be tripped instead of infinite recursion (the run-time is
/// linearly bounded by the maximum cycle length of its input).
///
/// WARNING: This class works best with InT & OutT that are trivial scalar
/// types. The input/output elements will be frequently copied and hashed.
template <typename InT, typename OutT>
class CyclicReplacerCache {
public:
/// User-provided replacement function & cycle-breaking functions.
/// The cycle-breaking function must not make any more recursive invocations
/// to this cached replacer.
using CycleBreakerFn = std::function<std::optional<OutT>(InT)>;

CyclicReplacerCache() = delete;
CyclicReplacerCache(CycleBreakerFn cycleBreaker)
: cycleBreaker(std::move(cycleBreaker)) {}

/// A possibly unresolved cache entry.
/// If unresolved, the entry must be resolved before it goes out of scope.
struct CacheEntry {
public:
~CacheEntry() { assert(result && "unresovled cache entry"); }

/// Check whether this node was repeated during recursive replacements.
/// This only makes sense to be called after all recursive replacements are
/// completed and the current element has resurfaced to the top of the
/// replacement stack.
bool wasRepeated() const {
// If the top frame includes itself as a dependency, then it must have
// been repeated.
ReplacementFrame &currFrame = cache.replacementStack.back();
size_t currFrameIndex = cache.replacementStack.size() - 1;
return currFrame.dependentFrames.count(currFrameIndex);
}

/// Resolve an unresolved cache entry by providing the result to be stored
/// in the cache.
void resolve(OutT result) {
assert(!this->result && "cache entry already resolved");
cache.finalizeReplacement(element, result);
this->result = std::move(result);
}

/// Get the resolved result if one exists.
const std::optional<OutT> &get() const { return result; }

private:
friend class CyclicReplacerCache;
CacheEntry() = delete;
CacheEntry(CyclicReplacerCache<InT, OutT> &cache, InT element,
std::optional<OutT> result = std::nullopt)
: cache(cache), element(std::move(element)), result(result) {}

CyclicReplacerCache<InT, OutT> &cache;
InT element;
std::optional<OutT> result;
};

/// Lookup the cache for a pre-calculated replacement for `element`.
/// If one exists, a resolved CacheEntry will be returned. Otherwise, an
/// unresolved CacheEntry will be returned, and the caller must resolve it
/// with the calculated replacement so it can be registered in the cache for
/// future use.
/// Multiple unresolved CacheEntries may be retrieved. However, any unresolved
/// CacheEntries that are returned must be resolved in reverse order of
/// retrieval, i.e. the last retrieved CacheEntry must be resolved first, and
/// the first retrieved CacheEntry must be resolved last. This should be
/// natural when used as a stack / inside recursion.
CacheEntry lookupOrInit(InT element);

private:
/// Register the replacement in the cache and update the replacementStack.
void finalizeReplacement(InT element, OutT result);

CycleBreakerFn cycleBreaker;
DenseMap<InT, OutT> standaloneCache;

struct DependentReplacement {
OutT replacement;
/// The highest replacement frame index that this cache entry is dependent
/// on.
size_t highestDependentFrame;
};
DenseMap<InT, DependentReplacement> dependentCache;

struct ReplacementFrame {
/// The set of elements that is only legal while under this current frame.
/// They need to be removed from the cache when this frame is popped off the
/// replacement stack.
DenseSet<InT> dependingReplacements;
/// The set of frame indices that this current frame's replacement is
/// dependent on, ordered from highest to lowest.
std::set<size_t, std::greater<size_t>> dependentFrames;
};
/// Every element currently in the progress of being replaced pushes a frame
/// onto this stack.
SmallVector<ReplacementFrame> replacementStack;
/// Maps from each input element to its indices on the replacement stack.
DenseMap<InT, SmallVector<size_t, 2>> cyclicElementFrame;
/// If set to true, we are currently asking an element to break a cycle. No
/// more recursive invocations is allowed while this is true (the replacement
/// stack can no longer grow).
bool resolvingCycle = false;
};

template <typename InT, typename OutT>
typename CyclicReplacerCache<InT, OutT>::CacheEntry
CyclicReplacerCache<InT, OutT>::lookupOrInit(InT element) {
assert(!resolvingCycle &&
"illegal recursive invocation while breaking cycle");

if (auto it = standaloneCache.find(element); it != standaloneCache.end())
return CacheEntry(*this, element, it->second);

if (auto it = dependentCache.find(element); it != dependentCache.end()) {
// Update the current top frame (the element that invoked this current
// replacement) to include any dependencies the cache entry had.
ReplacementFrame &currFrame = replacementStack.back();
currFrame.dependentFrames.insert(it->second.highestDependentFrame);
return CacheEntry(*this, element, it->second.replacement);
}

auto [it, inserted] = cyclicElementFrame.try_emplace(element);
if (!inserted) {
// This is a repeat of a known element. Try to break cycle here.
resolvingCycle = true;
std::optional<OutT> result = cycleBreaker(element);
resolvingCycle = false;
if (result) {
// Cycle was broken.
size_t dependentFrame = it->second.back();
dependentCache[element] = {*result, dependentFrame};
ReplacementFrame &currFrame = replacementStack.back();
// If this is a repeat, there is no replacement frame to pop. Mark the top
// frame as being dependent on this element.
currFrame.dependentFrames.insert(dependentFrame);

return CacheEntry(*this, element, *result);
}

// Cycle could not be broken.
// A legal setup must ensure at least one element of each cycle can break
// cycles. Under this setup, each element can be seen at most twice before
// the cycle is broken. If we see an element more than twice, we know this
// is an illegal setup.
assert(it->second.size() <= 2 && "illegal 3rd repeat of input");
}

// Otherwise, either this is the first time we see this element, or this
// element could not break this cycle.
it->second.push_back(replacementStack.size());
replacementStack.emplace_back();

return CacheEntry(*this, element);
}

template <typename InT, typename OutT>
void CyclicReplacerCache<InT, OutT>::finalizeReplacement(InT element,
OutT result) {
ReplacementFrame &currFrame = replacementStack.back();
// With the conclusion of this replacement frame, the current element is no
// longer a dependent element.
currFrame.dependentFrames.erase(replacementStack.size() - 1);

auto prevLayerIter = ++replacementStack.rbegin();
if (prevLayerIter == replacementStack.rend()) {
// If this is the last frame, there should be zero dependents.
assert(currFrame.dependentFrames.empty() &&
"internal error: top-level dependent replacement");
// Cache standalone result.
standaloneCache[element] = result;
} else if (currFrame.dependentFrames.empty()) {
// Cache standalone result.
standaloneCache[element] = result;
} else {
// Cache dependent result.
size_t highestDependentFrame = *currFrame.dependentFrames.begin();
dependentCache[element] = {result, highestDependentFrame};

// Otherwise, the previous frame inherits the same dependent frames.
prevLayerIter->dependentFrames.insert(currFrame.dependentFrames.begin(),
currFrame.dependentFrames.end());

// Mark this current replacement as a depending replacement on the closest
// dependent frame.
replacementStack[highestDependentFrame].dependingReplacements.insert(
element);
}

// All depending replacements in the cache must be purged.
for (InT key : currFrame.dependingReplacements)
dependentCache.erase(key);

replacementStack.pop_back();
auto it = cyclicElementFrame.find(element);
it->second.pop_back();
if (it->second.empty())
cyclicElementFrame.erase(it);
}

//===----------------------------------------------------------------------===//
// CachedCyclicReplacer
//===----------------------------------------------------------------------===//

/// A helper class for cases where the input/output types of the replacer
/// function is identical to the types stored in the cache. This class wraps
/// the user-provided replacer function, and can be used in place of the user
/// function.
template <typename InT, typename OutT>
class CachedCyclicReplacer {
public:
using ReplacerFn = std::function<OutT(InT)>;
using CycleBreakerFn =
typename CyclicReplacerCache<InT, OutT>::CycleBreakerFn;

CachedCyclicReplacer() = delete;
CachedCyclicReplacer(ReplacerFn replacer, CycleBreakerFn cycleBreaker)
: replacer(std::move(replacer)), cache(std::move(cycleBreaker)) {}

OutT operator()(InT element) {
auto cacheEntry = cache.lookupOrInit(element);
if (std::optional<OutT> result = cacheEntry.get())
return *result;

OutT result = replacer(element);
cacheEntry.resolve(result);
return result;
}

private:
ReplacerFn replacer;
CyclicReplacerCache<InT, OutT> cache;
};

} // namespace mlir

#endif // MLIR_SUPPORT_CYCLICREPLACERCACHE_H
1 change: 1 addition & 0 deletions mlir/unittests/Support/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
add_mlir_unittest(MLIRSupportTests
CyclicReplacerCacheTest.cpp
IndentedOstreamTest.cpp
StorageUniquerTest.cpp
)
Expand Down
478 changes: 478 additions & 0 deletions mlir/unittests/Support/CyclicReplacerCacheTest.cpp

Large diffs are not rendered by default.