147 changes: 147 additions & 0 deletions llvm/lib/Target/AVR/AVRShiftExpand.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
//===- AVRShift.cpp - Shift Expansion Pass --------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// Expand 32-bit shift instructions (shl, lshr, ashr) to inline loops, just
/// like avr-gcc. This must be done in IR because otherwise the type legalizer
/// will turn 32-bit shifts into (non-existing) library calls such as __ashlsi3.
//
//===----------------------------------------------------------------------===//

#include "AVR.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"

using namespace llvm;

namespace {

class AVRShiftExpand : public FunctionPass {
public:
static char ID;

AVRShiftExpand() : FunctionPass(ID) {}

bool runOnFunction(Function &F) override;

StringRef getPassName() const override { return "AVR Shift Expansion"; }

private:
void expand(BinaryOperator *BI);
};

} // end of anonymous namespace

char AVRShiftExpand::ID = 0;

INITIALIZE_PASS(AVRShiftExpand, "avr-shift-expand", "AVR Shift Expansion",
false, false)

Pass *llvm::createAVRShiftExpandPass() { return new AVRShiftExpand(); }

bool AVRShiftExpand::runOnFunction(Function &F) {
SmallVector<BinaryOperator *, 1> ShiftInsts;
auto &Ctx = F.getContext();
for (Instruction &I : instructions(F)) {
if (!I.isShift())
// Only expand shift instructions (shl, lshr, ashr).
continue;
if (I.getType() != Type::getInt32Ty(Ctx))
// Only expand plain i32 types.
continue;
if (isa<ConstantInt>(I.getOperand(1)))
// Only expand when the shift amount is not known.
// Known shift amounts are (currently) better expanded inline.
continue;
ShiftInsts.push_back(cast<BinaryOperator>(&I));
}

// The expanding itself needs to be done separately as expand() will remove
// these instructions. Removing instructions while iterating over a basic
// block is not a great idea.
for (auto *I : ShiftInsts) {
expand(I);
}

// Return whether this function expanded any shift instructions.
return ShiftInsts.size() > 0;
}

void AVRShiftExpand::expand(BinaryOperator *BI) {
auto &Ctx = BI->getContext();
IRBuilder<> Builder(BI);
Type *Int32Ty = Type::getInt32Ty(Ctx);
Type *Int8Ty = Type::getInt8Ty(Ctx);
Value *Int8Zero = ConstantInt::get(Int8Ty, 0);

// Split the current basic block at the point of the existing shift
// instruction and insert a new basic block for the loop.
BasicBlock *BB = BI->getParent();
Function *F = BB->getParent();
BasicBlock *EndBB = BB->splitBasicBlock(BI, "shift.done");
BasicBlock *LoopBB = BasicBlock::Create(Ctx, "shift.loop", F, EndBB);

// Truncate the shift amount to i8, which is trivially lowered to a single
// AVR register.
Builder.SetInsertPoint(&BB->back());
Value *ShiftAmount = Builder.CreateTrunc(BI->getOperand(1), Int8Ty);

// Replace the unconditional branch that splitBasicBlock created with a
// conditional branch.
Value *Cmp1 = Builder.CreateICmpEQ(ShiftAmount, Int8Zero);
Builder.CreateCondBr(Cmp1, EndBB, LoopBB);
BB->back().eraseFromParent();

// Create the loop body starting with PHI nodes.
Builder.SetInsertPoint(LoopBB);
PHINode *ShiftAmountPHI = Builder.CreatePHI(Int8Ty, 2);
ShiftAmountPHI->addIncoming(ShiftAmount, BB);
PHINode *ValuePHI = Builder.CreatePHI(Int32Ty, 2);
ValuePHI->addIncoming(BI->getOperand(0), BB);

// Subtract the shift amount by one, as we're shifting one this loop
// iteration.
Value *ShiftAmountSub =
Builder.CreateSub(ShiftAmountPHI, ConstantInt::get(Int8Ty, 1));
ShiftAmountPHI->addIncoming(ShiftAmountSub, LoopBB);

// Emit the actual shift instruction. The difference is that this shift
// instruction has a constant shift amount, which can be emitted inline
// without a library call.
Value *ValueShifted;
switch (BI->getOpcode()) {
case Instruction::Shl:
ValueShifted = Builder.CreateShl(ValuePHI, ConstantInt::get(Int32Ty, 1));
break;
case Instruction::LShr:
ValueShifted = Builder.CreateLShr(ValuePHI, ConstantInt::get(Int32Ty, 1));
break;
case Instruction::AShr:
ValueShifted = Builder.CreateAShr(ValuePHI, ConstantInt::get(Int32Ty, 1));
break;
default:
llvm_unreachable("asked to expand an instruction that is not a shift");
}
ValuePHI->addIncoming(ValueShifted, LoopBB);

// Branch to either the loop again (if there is more to shift) or to the
// basic block after the loop (if all bits are shifted).
Value *Cmp2 = Builder.CreateICmpEQ(ShiftAmountSub, Int8Zero);
Builder.CreateCondBr(Cmp2, EndBB, LoopBB);

// Collect the resulting value. This is necessary in the IR but won't produce
// any actual instructions.
Builder.SetInsertPoint(BI);
PHINode *Result = Builder.CreatePHI(Int32Ty, 2);
Result->addIncoming(BI->getOperand(0), BB);
Result->addIncoming(ValueShifted, LoopBB);

// Replace the original shift instruction.
BI->replaceAllUsesWith(Result);
BI->eraseFromParent();
}
11 changes: 11 additions & 0 deletions llvm/lib/Target/AVR/AVRTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class AVRPassConfig : public TargetPassConfig {
return getTM<AVRTargetMachine>();
}

void addIRPasses() override;
bool addInstSelector() override;
void addPreSched2() override;
void addPreEmitPass() override;
Expand All @@ -76,13 +77,23 @@ TargetPassConfig *AVRTargetMachine::createPassConfig(PassManagerBase &PM) {
return new AVRPassConfig(*this, PM);
}

void AVRPassConfig::addIRPasses() {
// Expand instructions like
// %result = shl i32 %n, %amount
// to a loop so that library calls are avoided.
addPass(createAVRShiftExpandPass());

TargetPassConfig::addIRPasses();
}

extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRTarget() {
// Register the target.
RegisterTargetMachine<AVRTargetMachine> X(getTheAVRTarget());

auto &PR = *PassRegistry::getPassRegistry();
initializeAVRExpandPseudoPass(PR);
initializeAVRRelaxMemPass(PR);
initializeAVRShiftExpandPass(PR);
}

const AVRSubtarget *AVRTargetMachine::getSubtargetImpl() const {
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AVR/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ add_llvm_target(AVRCodeGen
AVRMCInstLower.cpp
AVRRelaxMemOperations.cpp
AVRRegisterInfo.cpp
AVRShiftExpand.cpp
AVRSubtarget.cpp
AVRTargetMachine.cpp
AVRTargetObjectFile.cpp
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AVR/call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@ define i16 @calli16_reg() {

define i16 @calli16_stack() {
; CHECK-LABEL: calli16_stack:
; CHECK: ldi [[REG1:r[0-9]+]], 9
; CHECK: ldi [[REG2:r[0-9]+]], 2
; CHECK: std Z+1, [[REG1]]
; CHECK: std Z+2, [[REG2]]
; CHECK: ldi [[REG1:r[0-9]+]], 10
; CHECK: ldi [[REG2:r[0-9]+]], 2
; CHECK: std Z+3, [[REG1]]
; CHECK: std Z+4, [[REG2]]
; CHECK: ldi [[REG1:r[0-9]+]], 9
; CHECK: ldi [[REG2:r[0-9]+]], 2
; CHECK: std Z+1, [[REG1]]
; CHECK: std Z+2, [[REG2]]
; CHECK: call foo16_2
%result1 = call i16 @foo16_2(i16 512, i16 513, i16 514, i16 515, i16 516, i16 517, i16 518, i16 519, i16 520, i16 521, i16 522)
ret i16 %result1
Expand All @@ -82,14 +82,14 @@ define i32 @calli32_reg() {

define i32 @calli32_stack() {
; CHECK-LABEL: calli32_stack:
; CHECK: ldi [[REG1:r[0-9]+]], 64
; CHECK: ldi [[REG2:r[0-9]+]], 66
; CHECK: std Z+1, [[REG1]]
; CHECK: std Z+2, [[REG2]]
; CHECK: ldi [[REG1:r[0-9]+]], 15
; CHECK: ldi [[REG2:r[0-9]+]], 2
; CHECK: std Z+3, [[REG1]]
; CHECK: std Z+4, [[REG2]]
; CHECK: ldi [[REG1:r[0-9]+]], 64
; CHECK: ldi [[REG2:r[0-9]+]], 66
; CHECK: std Z+1, [[REG1]]
; CHECK: std Z+2, [[REG2]]
; CHECK: call foo32_2
%result1 = call i32 @foo32_2(i32 1, i32 2, i32 3, i32 4, i32 34554432)
ret i32 %result1
Expand All @@ -113,14 +113,14 @@ define i64 @calli64_reg() {
define i64 @calli64_stack() {
; CHECK-LABEL: calli64_stack:

; CHECK: ldi [[REG1:r[0-9]+]], 76
; CHECK: ldi [[REG2:r[0-9]+]], 73
; CHECK: std Z+5, [[REG1]]
; CHECK: std Z+6, [[REG2]]
; CHECK: ldi [[REG1:r[0-9]+]], 31
; CHECK: ldi [[REG2:r[0-9]+]], 242
; CHECK: std Z+7, [[REG1]]
; CHECK: std Z+8, [[REG2]]
; CHECK: ldi [[REG1:r[0-9]+]], 76
; CHECK: ldi [[REG2:r[0-9]+]], 73
; CHECK: std Z+5, [[REG1]]
; CHECK: std Z+6, [[REG2]]
; CHECK: ldi [[REG1:r[0-9]+]], 155
; CHECK: ldi [[REG2:r[0-9]+]], 88
; CHECK: std Z+3, [[REG1]]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AVR/dynalloca.ll
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,10 @@ define void @dynalloca2(i16 %x) {
; Store values on the stack
; CHECK: ldi r16, 0
; CHECK: ldi r17, 0
; CHECK: std Z+5, r16
; CHECK: std Z+6, r17
; CHECK: std Z+7, r16
; CHECK: std Z+8, r17
; CHECK: std Z+5, r16
; CHECK: std Z+6, r17
; CHECK: std Z+3, r16
; CHECK: std Z+4, r17
; CHECK: std Z+1, r16
Expand Down
12 changes: 12 additions & 0 deletions llvm/test/CodeGen/AVR/intrinsics/named-reg-alloc.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
; RUN: not --crash llc -O0 < %s -march=avr 2>&1 | FileCheck %s

define void @foo() {
entry:
; CHECK: Invalid register name "r28".
%val1 = call i8 @llvm.read_register.i8(metadata !0)
ret void
}

declare i8 @llvm.read_register.i8(metadata)

!0 = !{!"r28"}
42 changes: 42 additions & 0 deletions llvm/test/CodeGen/AVR/intrinsics/named-reg-special.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
; RUN: llc -O0 < %s -march=avr | FileCheck %s

; CHECK-LABEL: read_sp:
; CHECK: in r24, 61
; CHECK: in r25, 62
define i16 @read_sp() {
entry:
%sp = call i16 @llvm.read_register.i16(metadata !0)
ret i16 %sp
}

; CHECK-LABEL: read_r0:
; CHECK: mov r24, r0
define i8 @read_r0() {
entry:
%r0 = call i8 @llvm.read_register.i8(metadata !1)
ret i8 %r0
}

; CHECK-LABEL: read_r1:
; CHECK: mov r24, r1
define i8 @read_r1() {
entry:
%r1 = call i8 @llvm.read_register.i8(metadata !2)
ret i8 %r1
}

; CHECK-LABEL: read_r1r0:
; CHECK: mov r24, r0
; CHECK: mov r25, r1
define i16 @read_r1r0() {
entry:
%r1r0 = call i16 @llvm.read_register.i16(metadata !1)
ret i16 %r1r0
}

declare i16 @llvm.read_register.i16(metadata)
declare i8 @llvm.read_register.i8(metadata)

!0 = !{!"sp"}
!1 = !{!"r0"}
!2 = !{!"r1"}
17 changes: 0 additions & 17 deletions llvm/test/CodeGen/AVR/intrinsics/read_register.ll

This file was deleted.

7 changes: 3 additions & 4 deletions llvm/test/CodeGen/AVR/rot.ll
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,9 @@ define i8 @ror8(i8 %val, i8 %amt) {
; CHECK-NEXT: brmi .LBB1_2

; CHECK-NEXT: .LBB1_1:
; CHECK-NEXT: lsr r24
; CHECK-NEXT: ldi r0, 0
; CHECK-NEXT: ror r0
; CHECK-NEXT: or r24, r0
; CHECK-NEXT: bst r24, 0
; CHECK-NEXT: ror r24
; CHECK-NEXT: bld r24, 7
; CHECK-NEXT: dec r22
; CHECK-NEXT: brpl .LBB1_1

Expand Down
89 changes: 89 additions & 0 deletions llvm/test/CodeGen/AVR/shift-expand.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -avr-shift-expand -S %s -o - | FileCheck %s

; The avr-shift-expand pass expands large shifts with a non-constant shift
; amount to a loop. These loops avoid generating a (non-existing) builtin such
; as __ashlsi3.

target datalayout = "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8"
target triple = "avr"

define i32 @shl(i32 %value, i32 %amount) addrspace(1) {
; CHECK-LABEL: @shl(
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[AMOUNT:%.*]] to i8
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
; CHECK: shift.loop:
; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ]
; CHECK-NEXT: [[TMP5]] = sub i8 [[TMP3]], 1
; CHECK-NEXT: [[TMP6]] = shl i32 [[TMP4]], 1
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0
; CHECK-NEXT: br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]]
; CHECK: shift.done:
; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ]
; CHECK-NEXT: ret i32 [[TMP8]]
;
%result = shl i32 %value, %amount
ret i32 %result
}

define i32 @lshr(i32 %value, i32 %amount) addrspace(1) {
; CHECK-LABEL: @lshr(
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[AMOUNT:%.*]] to i8
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
; CHECK: shift.loop:
; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ]
; CHECK-NEXT: [[TMP5]] = sub i8 [[TMP3]], 1
; CHECK-NEXT: [[TMP6]] = lshr i32 [[TMP4]], 1
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0
; CHECK-NEXT: br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]]
; CHECK: shift.done:
; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ]
; CHECK-NEXT: ret i32 [[TMP8]]
;
%result = lshr i32 %value, %amount
ret i32 %result
}

define i32 @ashr(i32 %0, i32 %1) addrspace(1) {
; CHECK-LABEL: @ashr(
; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1:%.*]] to i8
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i8 [[TMP3]], 0
; CHECK-NEXT: br i1 [[TMP4]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
; CHECK: shift.loop:
; CHECK-NEXT: [[TMP5:%.*]] = phi i8 [ [[TMP3]], [[TMP2:%.*]] ], [ [[TMP7:%.*]], [[SHIFT_LOOP]] ]
; CHECK-NEXT: [[TMP6:%.*]] = phi i32 [ [[TMP0:%.*]], [[TMP2]] ], [ [[TMP8:%.*]], [[SHIFT_LOOP]] ]
; CHECK-NEXT: [[TMP7]] = sub i8 [[TMP5]], 1
; CHECK-NEXT: [[TMP8]] = ashr i32 [[TMP6]], 1
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP7]], 0
; CHECK-NEXT: br i1 [[TMP9]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]]
; CHECK: shift.done:
; CHECK-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP0]], [[TMP2]] ], [ [[TMP8]], [[SHIFT_LOOP]] ]
; CHECK-NEXT: ret i32 [[TMP10]]
;
%3 = ashr i32 %0, %1
ret i32 %3
}

; This function is not modified because it is not an i32.
define i40 @shl40(i40 %value, i40 %amount) addrspace(1) {
; CHECK-LABEL: @shl40(
; CHECK-NEXT: [[RESULT:%.*]] = shl i40 [[VALUE:%.*]], [[AMOUNT:%.*]]
; CHECK-NEXT: ret i40 [[RESULT]]
;
%result = shl i40 %value, %amount
ret i40 %result
}

; This function isn't either, although perhaps it should.
define i24 @shl24(i24 %value, i24 %amount) addrspace(1) {
; CHECK-LABEL: @shl24(
; CHECK-NEXT: [[RESULT:%.*]] = shl i24 [[VALUE:%.*]], [[AMOUNT:%.*]]
; CHECK-NEXT: ret i24 [[RESULT]]
;
%result = shl i24 %value, %amount
ret i24 %result
}
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AVR/varargs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,14 @@ define i16 @varargs2(i8* nocapture %x, ...) {
declare void @var1223(i16, ...)
define void @varargcall() {
; CHECK-LABEL: varargcall:
; CHECK: ldi [[REG1:r[0-9]+]], 189
; CHECK: ldi [[REG2:r[0-9]+]], 205
; CHECK: std Z+3, [[REG1]]
; CHECK: std Z+4, [[REG2]]
; CHECK: ldi [[REG1:r[0-9]+]], 191
; CHECK: ldi [[REG2:r[0-9]+]], 223
; CHECK: std Z+5, [[REG1]]
; CHECK: std Z+6, [[REG2]]
; CHECK: ldi [[REG1:r[0-9]+]], 189
; CHECK: ldi [[REG2:r[0-9]+]], 205
; CHECK: std Z+3, [[REG1]]
; CHECK: std Z+4, [[REG2]]
; CHECK: ldi [[REG1:r[0-9]+]], 205
; CHECK: ldi [[REG2:r[0-9]+]], 171
; CHECK: std Z+1, [[REG1]]
Expand Down