-
Notifications
You must be signed in to change notification settings - Fork 11.6k
/
AArch64InstructionSelector.cpp
5930 lines (5255 loc) · 211 KB
/
AArch64InstructionSelector.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
/// This file implements the targeting of the InstructionSelector class for
/// AArch64.
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64RegisterBankInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "aarch64-isel"
using namespace llvm;
using namespace MIPatternMatch;
namespace {
#define GET_GLOBALISEL_PREDICATE_BITSET
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATE_BITSET
class AArch64InstructionSelector : public InstructionSelector {
public:
AArch64InstructionSelector(const AArch64TargetMachine &TM,
const AArch64Subtarget &STI,
const AArch64RegisterBankInfo &RBI);
bool select(MachineInstr &I) override;
static const char *getName() { return DEBUG_TYPE; }
void setupMF(MachineFunction &MF, GISelKnownBits &KB,
CodeGenCoverage &CoverageInfo) override {
InstructionSelector::setupMF(MF, KB, CoverageInfo);
// hasFnAttribute() is expensive to call on every BRCOND selection, so
// cache it here for each run of the selector.
ProduceNonFlagSettingCondBr =
!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
MFReturnAddr = Register();
processPHIs(MF);
}
private:
/// tblgen-erated 'select' implementation, used as the initial selector for
/// the patterns that don't require complex C++.
bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
// A lowering phase that runs before any selection attempts.
// Returns true if the instruction was modified.
bool preISelLower(MachineInstr &I);
// An early selection function that runs before the selectImpl() call.
bool earlySelect(MachineInstr &I) const;
// Do some preprocessing of G_PHIs before we begin selection.
void processPHIs(MachineFunction &MF);
bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
/// Eliminate same-sized cross-bank copies into stores before selectImpl().
bool contractCrossBankCopyIntoStore(MachineInstr &I,
MachineRegisterInfo &MRI);
bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
bool tryOptAndIntoCompareBranch(MachineInstr *LHS,
int64_t CmpConstant,
const CmpInst::Predicate &Pred,
MachineBasicBlock *DstMBB,
MachineIRBuilder &MIB) const;
bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
// Helper to generate an equivalent of scalar_to_vector into a new register,
// returned via 'Dst'.
MachineInstr *emitScalarToVector(unsigned EltSize,
const TargetRegisterClass *DstRC,
Register Scalar,
MachineIRBuilder &MIRBuilder) const;
/// Emit a lane insert into \p DstReg, or a new vector register if None is
/// provided.
///
/// The lane inserted into is defined by \p LaneIdx. The vector source
/// register is given by \p SrcReg. The register containing the element is
/// given by \p EltReg.
MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
Register EltReg, unsigned LaneIdx,
const RegisterBank &RB,
MachineIRBuilder &MIRBuilder) const;
bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
MachineRegisterInfo &MRI) const;
bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool tryOptShuffleDupLane(MachineInstr &I, LLT DstTy, LLT SrcTy,
ArrayRef<int> Mask, MachineRegisterInfo &MRI) const;
bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectSplitVectorUnmerge(MachineInstr &I,
MachineRegisterInfo &MRI) const;
bool selectIntrinsicWithSideEffects(MachineInstr &I,
MachineRegisterInfo &MRI) const;
bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
unsigned emitConstantPoolEntry(const Constant *CPVal,
MachineFunction &MF) const;
MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
MachineIRBuilder &MIRBuilder) const;
// Emit a vector concat operation.
MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
Register Op2,
MachineIRBuilder &MIRBuilder) const;
// Emit an integer compare between LHS and RHS, which checks for Predicate.
//
// This returns the produced compare instruction, and the predicate which
// was ultimately used in the compare. The predicate may differ from what
// is passed in \p Predicate due to optimization.
std::pair<MachineInstr *, CmpInst::Predicate>
emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitInstr(unsigned Opcode,
std::initializer_list<llvm::DstOp> DstOps,
std::initializer_list<llvm::SrcOp> SrcOps,
MachineIRBuilder &MIRBuilder,
const ComplexRendererFns &RenderFns = None) const;
/// Helper function to emit a binary operation such as an ADD, ADDS, etc.
///
/// This is intended for instructions with the following opcode variants:
///
/// - Xri, Wri (arithmetic immediate form)
/// - Xrs, Wrs (shifted register form)
/// - Xrr, Wrr (register form)
///
/// For example, for ADD, we have ADDXri, ADDWri, ADDXrs, etc.
///
/// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
/// in a specific order.
///
/// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
///
/// \code
/// const std::array<std::array<unsigned, 2>, 3> Table {
/// {{AArch64::ADDXri, AArch64::ADDWri},
/// {AArch64::ADDXrs, AArch64::ADDWrs},
/// {AArch64::ADDXrr, AArch64::ADDWrr}}};
/// \endcode
///
/// Each row in the table corresponds to a different addressing mode. Each
/// column corresponds to a different register size.
///
/// \attention Rows must be structured as follows:
/// - Row 0: The ri opcode variants
/// - Row 1: The rs opcode variants
/// - Row 2: The rr opcode variants
///
/// \attention Columns must be structured as follows:
/// - Column 0: The 64-bit opcode variants
/// - Column 1: The 32-bit opcode variants
///
/// \p Dst is the destination register of the binop to emit.
/// \p LHS is the left-hand operand of the binop to emit.
/// \p RHS is the right-hand operand of the binop to emit.
MachineInstr *emitBinOp(
const std::array<std::array<unsigned, 2>, 3> &AddrModeAndSizeToOpcode,
Register Dst, MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitTST(const Register &LHS, const Register &RHS,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
const RegisterBank &DstRB, LLT ScalarTy,
Register VecReg, unsigned LaneIdx,
MachineIRBuilder &MIRBuilder) const;
/// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
/// materialized using a FMOV instruction, then update MI and return it.
/// Otherwise, do nothing and return a nullptr.
MachineInstr *emitFMovForFConstant(MachineInstr &MI,
MachineRegisterInfo &MRI) const;
/// Emit a CSet for a compare.
MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
MachineIRBuilder &MIRBuilder) const;
/// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
/// \p IsNegative is true if the test should be "not zero".
/// This will also optimize the test bit instruction when possible.
MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
MachineBasicBlock *DstMBB,
MachineIRBuilder &MIB) const;
// Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
// We use these manually instead of using the importer since it doesn't
// support SDNodeXForm.
ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
unsigned Size) const;
ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
return selectAddrModeUnscaled(Root, 1);
}
ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
return selectAddrModeUnscaled(Root, 2);
}
ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
return selectAddrModeUnscaled(Root, 4);
}
ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
return selectAddrModeUnscaled(Root, 8);
}
ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
return selectAddrModeUnscaled(Root, 16);
}
/// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
/// from complex pattern matchers like selectAddrModeIndexed().
ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
MachineRegisterInfo &MRI) const;
ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
unsigned Size) const;
template <int Width>
ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
return selectAddrModeIndexed(Root, Width / 8);
}
bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
const MachineRegisterInfo &MRI) const;
ComplexRendererFns
selectAddrModeShiftedExtendXReg(MachineOperand &Root,
unsigned SizeInBytes) const;
/// Returns a \p ComplexRendererFns which contains a base, offset, and whether
/// or not a shift + extend should be folded into an addressing mode. Returns
/// None when this is not profitable or possible.
ComplexRendererFns
selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
MachineOperand &Offset, unsigned SizeInBytes,
bool WantsExt) const;
ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
unsigned SizeInBytes) const;
template <int Width>
ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
return selectAddrModeXRO(Root, Width / 8);
}
ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
unsigned SizeInBytes) const;
template <int Width>
ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
return selectAddrModeWRO(Root, Width / 8);
}
ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
return selectShiftedRegister(Root);
}
ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
// TODO: selectShiftedRegister should allow for rotates on logical shifts.
// For now, make them the same. The only difference between the two is that
// logical shifts are allowed to fold in rotates. Otherwise, these are
// functionally the same.
return selectShiftedRegister(Root);
}
/// Given an extend instruction, determine the correct shift-extend type for
/// that instruction.
///
/// If the instruction is going to be used in a load or store, pass
/// \p IsLoadStore = true.
AArch64_AM::ShiftExtendType
getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
bool IsLoadStore = false) const;
/// Instructions that accept extend modifiers like UXTW expect the register
/// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a
/// subregister copy if necessary. Return either ExtReg, or the result of the
/// new copy.
Register narrowExtendRegIfNeeded(Register ExtReg,
MachineIRBuilder &MIB) const;
Register widenGPRBankRegIfNeeded(Register Reg, unsigned Size,
MachineIRBuilder &MIB) const;
ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx = -1) const;
void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
int OpIdx = -1) const;
void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
int OpIdx = -1) const;
// Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
void materializeLargeCMVal(MachineInstr &I, const Value *V,
unsigned OpFlags) const;
// Optimization methods.
bool tryOptSelect(MachineInstr &MI) const;
MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *tryOptArithImmedIntegerCompare(MachineOperand &LHS,
MachineOperand &RHS,
CmpInst::Predicate &Predicate,
MachineIRBuilder &MIB) const;
MachineInstr *tryOptArithShiftedCompare(MachineOperand &LHS,
MachineOperand &RHS,
MachineIRBuilder &MIB) const;
/// Return true if \p MI is a load or store of \p NumBytes bytes.
bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
/// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
/// register zeroed out. In other words, the result of MI has been explicitly
/// zero extended.
bool isDef32(const MachineInstr &MI) const;
const AArch64TargetMachine &TM;
const AArch64Subtarget &STI;
const AArch64InstrInfo &TII;
const AArch64RegisterInfo &TRI;
const AArch64RegisterBankInfo &RBI;
bool ProduceNonFlagSettingCondBr = false;
// Some cached values used during selection.
// We use LR as a live-in register, and we keep track of it here as it can be
// clobbered by calls.
Register MFReturnAddr;
#define GET_GLOBALISEL_PREDICATES_DECL
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATES_DECL
// We declare the temporaries used by selectImpl() in the class to minimize the
// cost of constructing placeholder values.
#define GET_GLOBALISEL_TEMPORARIES_DECL
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_TEMPORARIES_DECL
};
} // end anonymous namespace
#define GET_GLOBALISEL_IMPL
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_IMPL
AArch64InstructionSelector::AArch64InstructionSelector(
const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
const AArch64RegisterBankInfo &RBI)
: InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()), RBI(RBI),
#define GET_GLOBALISEL_PREDICATES_INIT
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_TEMPORARIES_INIT
{
}
// FIXME: This should be target-independent, inferred from the types declared
// for each class in the bank.
static const TargetRegisterClass *
getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
const RegisterBankInfo &RBI,
bool GetAllRegSet = false) {
if (RB.getID() == AArch64::GPRRegBankID) {
if (Ty.getSizeInBits() <= 32)
return GetAllRegSet ? &AArch64::GPR32allRegClass
: &AArch64::GPR32RegClass;
if (Ty.getSizeInBits() == 64)
return GetAllRegSet ? &AArch64::GPR64allRegClass
: &AArch64::GPR64RegClass;
return nullptr;
}
if (RB.getID() == AArch64::FPRRegBankID) {
if (Ty.getSizeInBits() <= 16)
return &AArch64::FPR16RegClass;
if (Ty.getSizeInBits() == 32)
return &AArch64::FPR32RegClass;
if (Ty.getSizeInBits() == 64)
return &AArch64::FPR64RegClass;
if (Ty.getSizeInBits() == 128)
return &AArch64::FPR128RegClass;
return nullptr;
}
return nullptr;
}
/// Given a register bank, and size in bits, return the smallest register class
/// that can represent that combination.
static const TargetRegisterClass *
getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
bool GetAllRegSet = false) {
unsigned RegBankID = RB.getID();
if (RegBankID == AArch64::GPRRegBankID) {
if (SizeInBits <= 32)
return GetAllRegSet ? &AArch64::GPR32allRegClass
: &AArch64::GPR32RegClass;
if (SizeInBits == 64)
return GetAllRegSet ? &AArch64::GPR64allRegClass
: &AArch64::GPR64RegClass;
}
if (RegBankID == AArch64::FPRRegBankID) {
switch (SizeInBits) {
default:
return nullptr;
case 8:
return &AArch64::FPR8RegClass;
case 16:
return &AArch64::FPR16RegClass;
case 32:
return &AArch64::FPR32RegClass;
case 64:
return &AArch64::FPR64RegClass;
case 128:
return &AArch64::FPR128RegClass;
}
}
return nullptr;
}
/// Returns the correct subregister to use for a given register class.
static bool getSubRegForClass(const TargetRegisterClass *RC,
const TargetRegisterInfo &TRI, unsigned &SubReg) {
switch (TRI.getRegSizeInBits(*RC)) {
case 8:
SubReg = AArch64::bsub;
break;
case 16:
SubReg = AArch64::hsub;
break;
case 32:
if (RC != &AArch64::FPR32RegClass)
SubReg = AArch64::sub_32;
else
SubReg = AArch64::ssub;
break;
case 64:
SubReg = AArch64::dsub;
break;
default:
LLVM_DEBUG(
dbgs() << "Couldn't find appropriate subregister for register class.");
return false;
}
return true;
}
/// Returns the minimum size the given register bank can hold.
static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
switch (RB.getID()) {
case AArch64::GPRRegBankID:
return 32;
case AArch64::FPRRegBankID:
return 8;
default:
llvm_unreachable("Tried to get minimum size for unknown register bank.");
}
}
static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
auto &MI = *Root.getParent();
auto &MBB = *MI.getParent();
auto &MF = *MBB.getParent();
auto &MRI = MF.getRegInfo();
uint64_t Immed;
if (Root.isImm())
Immed = Root.getImm();
else if (Root.isCImm())
Immed = Root.getCImm()->getZExtValue();
else if (Root.isReg()) {
auto ValAndVReg =
getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
if (!ValAndVReg)
return None;
Immed = ValAndVReg->Value;
} else
return None;
return Immed;
}
/// Check whether \p I is a currently unsupported binary operation:
/// - it has an unsized type
/// - an operand is not a vreg
/// - all operands are not in the same bank
/// These are checks that should someday live in the verifier, but right now,
/// these are mostly limitations of the aarch64 selector.
static bool unsupportedBinOp(const MachineInstr &I,
const AArch64RegisterBankInfo &RBI,
const MachineRegisterInfo &MRI,
const AArch64RegisterInfo &TRI) {
LLT Ty = MRI.getType(I.getOperand(0).getReg());
if (!Ty.isValid()) {
LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
return true;
}
const RegisterBank *PrevOpBank = nullptr;
for (auto &MO : I.operands()) {
// FIXME: Support non-register operands.
if (!MO.isReg()) {
LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
return true;
}
// FIXME: Can generic operations have physical registers operands? If
// so, this will need to be taught about that, and we'll need to get the
// bank out of the minimal class for the register.
// Either way, this needs to be documented (and possibly verified).
if (!Register::isVirtualRegister(MO.getReg())) {
LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
return true;
}
const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
if (!OpBank) {
LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
return true;
}
if (PrevOpBank && OpBank != PrevOpBank) {
LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
return true;
}
PrevOpBank = OpBank;
}
return false;
}
/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
/// and of size \p OpSize.
/// \returns \p GenericOpc if the combination is unsupported.
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
unsigned OpSize) {
switch (RegBankID) {
case AArch64::GPRRegBankID:
if (OpSize == 32) {
switch (GenericOpc) {
case TargetOpcode::G_SHL:
return AArch64::LSLVWr;
case TargetOpcode::G_LSHR:
return AArch64::LSRVWr;
case TargetOpcode::G_ASHR:
return AArch64::ASRVWr;
default:
return GenericOpc;
}
} else if (OpSize == 64) {
switch (GenericOpc) {
case TargetOpcode::G_PTR_ADD:
return AArch64::ADDXrr;
case TargetOpcode::G_SHL:
return AArch64::LSLVXr;
case TargetOpcode::G_LSHR:
return AArch64::LSRVXr;
case TargetOpcode::G_ASHR:
return AArch64::ASRVXr;
default:
return GenericOpc;
}
}
break;
case AArch64::FPRRegBankID:
switch (OpSize) {
case 32:
switch (GenericOpc) {
case TargetOpcode::G_FADD:
return AArch64::FADDSrr;
case TargetOpcode::G_FSUB:
return AArch64::FSUBSrr;
case TargetOpcode::G_FMUL:
return AArch64::FMULSrr;
case TargetOpcode::G_FDIV:
return AArch64::FDIVSrr;
default:
return GenericOpc;
}
case 64:
switch (GenericOpc) {
case TargetOpcode::G_FADD:
return AArch64::FADDDrr;
case TargetOpcode::G_FSUB:
return AArch64::FSUBDrr;
case TargetOpcode::G_FMUL:
return AArch64::FMULDrr;
case TargetOpcode::G_FDIV:
return AArch64::FDIVDrr;
case TargetOpcode::G_OR:
return AArch64::ORRv8i8;
default:
return GenericOpc;
}
}
break;
}
return GenericOpc;
}
/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
/// appropriate for the (value) register bank \p RegBankID and of memory access
/// size \p OpSize. This returns the variant with the base+unsigned-immediate
/// addressing mode (e.g., LDRXui).
/// \returns \p GenericOpc if the combination is unsupported.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
unsigned OpSize) {
const bool isStore = GenericOpc == TargetOpcode::G_STORE;
switch (RegBankID) {
case AArch64::GPRRegBankID:
switch (OpSize) {
case 8:
return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
case 16:
return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
case 32:
return isStore ? AArch64::STRWui : AArch64::LDRWui;
case 64:
return isStore ? AArch64::STRXui : AArch64::LDRXui;
}
break;
case AArch64::FPRRegBankID:
switch (OpSize) {
case 8:
return isStore ? AArch64::STRBui : AArch64::LDRBui;
case 16:
return isStore ? AArch64::STRHui : AArch64::LDRHui;
case 32:
return isStore ? AArch64::STRSui : AArch64::LDRSui;
case 64:
return isStore ? AArch64::STRDui : AArch64::LDRDui;
}
break;
}
return GenericOpc;
}
#ifndef NDEBUG
/// Helper function that verifies that we have a valid copy at the end of
/// selectCopy. Verifies that the source and dest have the expected sizes and
/// then returns true.
static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) {
const Register DstReg = I.getOperand(0).getReg();
const Register SrcReg = I.getOperand(1).getReg();
const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
// Make sure the size of the source and dest line up.
assert(
(DstSize == SrcSize ||
// Copies are a mean to setup initial types, the number of
// bits may not exactly match.
(Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
// Copies are a mean to copy bits around, as long as we are
// on the same register class, that's fine. Otherwise, that
// means we need some SUBREG_TO_REG or AND & co.
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!");
// Check the size of the destination.
assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
"GPRs cannot get more than 64-bit width values");
return true;
}
#endif
/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
/// to \p *To.
///
/// E.g "To = COPY SrcReg:SubReg"
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
const RegisterBankInfo &RBI, Register SrcReg,
const TargetRegisterClass *To, unsigned SubReg) {
assert(SrcReg.isValid() && "Expected a valid source register?");
assert(To && "Destination register class cannot be null");
assert(SubReg && "Expected a valid subregister");
MachineIRBuilder MIB(I);
auto SubRegCopy =
MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
MachineOperand &RegOp = I.getOperand(1);
RegOp.setReg(SubRegCopy.getReg(0));
// It's possible that the destination register won't be constrained. Make
// sure that happens.
if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
return true;
}
/// Helper function to get the source and destination register classes for a
/// copy. Returns a std::pair containing the source register class for the
/// copy, and the destination register class for the copy. If a register class
/// cannot be determined, then it will be nullptr.
static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) {
Register DstReg = I.getOperand(0).getReg();
Register SrcReg = I.getOperand(1).getReg();
const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
// Special casing for cross-bank copies of s1s. We can technically represent
// a 1-bit value with any size of register. The minimum size for a GPR is 32
// bits. So, we need to put the FPR on 32 bits as well.
//
// FIXME: I'm not sure if this case holds true outside of copies. If it does,
// then we can pull it into the helpers that get the appropriate class for a
// register bank. Or make a new helper that carries along some constraint
// information.
if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
SrcSize = DstSize = 32;
return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
getMinClassForRegBank(DstRegBank, DstSize, true)};
}
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) {
Register DstReg = I.getOperand(0).getReg();
Register SrcReg = I.getOperand(1).getReg();
const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
// Find the correct register classes for the source and destination registers.
const TargetRegisterClass *SrcRC;
const TargetRegisterClass *DstRC;
std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
if (!DstRC) {
LLVM_DEBUG(dbgs() << "Unexpected dest size "
<< RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
return false;
}
// A couple helpers below, for making sure that the copy we produce is valid.
// Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
// to verify that the src and dst are the same size, since that's handled by
// the SUBREG_TO_REG.
bool KnownValid = false;
// Returns true, or asserts if something we don't expect happens. Instead of
// returning true, we return isValidCopy() to ensure that we verify the
// result.
auto CheckCopy = [&]() {
// If we have a bitcast or something, we can't have physical registers.
assert((I.isCopy() ||
(!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
!Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
"No phys reg on generic operator!");
bool ValidCopy = true;
#ifndef NDEBUG
ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
assert(ValidCopy && "Invalid copy.");
#endif
return ValidCopy;
};
// Is this a copy? If so, then we may need to insert a subregister copy.
if (I.isCopy()) {
// Yes. Check if there's anything to fix up.
if (!SrcRC) {
LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
return false;
}
unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
unsigned SubReg;
// If the source bank doesn't support a subregister copy small enough,
// then we first need to copy to the destination bank.
if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
const TargetRegisterClass *DstTempRC =
getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
getSubRegForClass(DstRC, TRI, SubReg);
MachineIRBuilder MIB(I);
auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
} else if (SrcSize > DstSize) {
// If the source register is bigger than the destination we need to
// perform a subregister copy.
const TargetRegisterClass *SubRegRC =
getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
getSubRegForClass(SubRegRC, TRI, SubReg);
copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
} else if (DstSize > SrcSize) {
// If the destination register is bigger than the source we need to do
// a promotion using SUBREG_TO_REG.
const TargetRegisterClass *PromotionRC =
getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
getSubRegForClass(SrcRC, TRI, SubReg);
Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
BuildMI(*I.getParent(), I, I.getDebugLoc(),
TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
.addImm(0)
.addUse(SrcReg)
.addImm(SubReg);
MachineOperand &RegOp = I.getOperand(1);
RegOp.setReg(PromoteReg);
// Promise that the copy is implicitly validated by the SUBREG_TO_REG.
KnownValid = true;
}
// If the destination is a physical register, then there's nothing to
// change, so we're done.
if (Register::isPhysicalRegister(DstReg))
return CheckCopy();
}
// No need to constrain SrcReg. It will get constrained when we hit another
// of its use or its defs. Copies do not have constraints.
if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
<< " operand\n");
return false;
}
I.setDesc(TII.get(AArch64::COPY));
return CheckCopy();
}
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
if (!DstTy.isScalar() || !SrcTy.isScalar())
return GenericOpc;
const unsigned DstSize = DstTy.getSizeInBits();
const unsigned SrcSize = SrcTy.getSizeInBits();
switch (DstSize) {
case 32:
switch (SrcSize) {
case 32:
switch (GenericOpc) {
case TargetOpcode::G_SITOFP:
return AArch64::SCVTFUWSri;
case TargetOpcode::G_UITOFP:
return AArch64::UCVTFUWSri;
case TargetOpcode::G_FPTOSI:
return AArch64::FCVTZSUWSr;
case TargetOpcode::G_FPTOUI:
return AArch64::FCVTZUUWSr;
default:
return GenericOpc;
}
case 64:
switch (GenericOpc) {
case TargetOpcode::G_SITOFP:
return AArch64::SCVTFUXSri;
case TargetOpcode::G_UITOFP:
return AArch64::UCVTFUXSri;
case TargetOpcode::G_FPTOSI:
return AArch64::FCVTZSUWDr;
case TargetOpcode::G_FPTOUI:
return AArch64::FCVTZUUWDr;
default:
return GenericOpc;
}
default:
return GenericOpc;
}
case 64:
switch (SrcSize) {
case 32:
switch (GenericOpc) {
case TargetOpcode::G_SITOFP:
return AArch64::SCVTFUWDri;
case TargetOpcode::G_UITOFP:
return AArch64::UCVTFUWDri;
case TargetOpcode::G_FPTOSI:
return AArch64::FCVTZSUXSr;
case TargetOpcode::G_FPTOUI:
return AArch64::FCVTZUUXSr;
default:
return GenericOpc;
}
case 64:
switch (GenericOpc) {
case TargetOpcode::G_SITOFP:
return AArch64::SCVTFUXDri;
case TargetOpcode::G_UITOFP:
return AArch64::UCVTFUXDri;
case TargetOpcode::G_FPTOSI:
return AArch64::FCVTZSUXDr;
case TargetOpcode::G_FPTOUI:
return AArch64::FCVTZUUXDr;
default:
return GenericOpc;
}
default:
return GenericOpc;
}
default:
return GenericOpc;
};
return GenericOpc;
}
static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
const RegisterBankInfo &RBI) {
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
AArch64::GPRRegBankID);
LLT Ty = MRI.getType(I.getOperand(0).getReg());
if (Ty == LLT::scalar(32))
return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
return 0;
}