Skip to content

Commit

Permalink
[Hexagon] Add patterns to load i1
Browse files Browse the repository at this point in the history
  • Loading branch information
Krzysztof Parzyszek committed Jun 28, 2021
1 parent 5d6240b commit 9c5ed8d
Show file tree
Hide file tree
Showing 3 changed files with 451 additions and 16 deletions.
62 changes: 46 additions & 16 deletions llvm/lib/Target/Hexagon/HexagonPatterns.td
Expand Up @@ -1948,6 +1948,9 @@ def: Pat<(HexagonAtPcrel I32:$addr),
// --(12) Load -----------------------------------------------------------
//

def L1toI32: OutPatFrag<(ops node:$Rs), (A2_subri 0, (i32 $Rs))>;
def L1toI64: OutPatFrag<(ops node:$Rs), (ToSext64 (L1toI32 $Rs))>;

def extloadv2i8: PatFrag<(ops node:$ptr), (extload node:$ptr), [{
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8;
}]>;
Expand Down Expand Up @@ -2104,11 +2107,17 @@ let AddedComplexity = 20 in {
}

let AddedComplexity = 30 in {
// Loads of i1 are loading a byte, and the byte should be either 0 or 1.
// It doesn't matter if it's sign- or zero-extended, so use zero-extension
// everywhere.
defm: Loadxim_pat<sextloadi1, i32, L1toI32, anyimm0, L2_loadrub_io>;
defm: Loadxim_pat<extloadi1, i64, ToAext64, anyimm0, L2_loadrub_io>;
defm: Loadxim_pat<sextloadi1, i64, L1toI64, anyimm0, L2_loadrub_io>;
defm: Loadxim_pat<zextloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>;

defm: Loadxim_pat<extloadi8, i64, ToAext64, anyimm0, L2_loadrub_io>;
defm: Loadxim_pat<extloadi16, i64, ToAext64, anyimm1, L2_loadruh_io>;
defm: Loadxim_pat<extloadi32, i64, ToAext64, anyimm2, L2_loadri_io>;
defm: Loadxim_pat<zextloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>;
defm: Loadxim_pat<zextloadi8, i64, ToZext64, anyimm0, L2_loadrub_io>;
defm: Loadxim_pat<zextloadi16, i64, ToZext64, anyimm1, L2_loadruh_io>;
defm: Loadxim_pat<zextloadi32, i64, ToZext64, anyimm2, L2_loadri_io>;
Expand All @@ -2118,6 +2127,7 @@ let AddedComplexity = 30 in {
}

let AddedComplexity = 60 in {
def: Loadxu_pat<extloadi1, i32, anyimm0, L4_loadrub_ur>;
def: Loadxu_pat<extloadi8, i32, anyimm0, L4_loadrub_ur>;
def: Loadxu_pat<extloadi16, i32, anyimm1, L4_loadruh_ur>;
def: Loadxu_pat<extloadv2i8, v2i16, anyimm1, L4_loadbzw2_ur>;
Expand All @@ -2126,6 +2136,7 @@ let AddedComplexity = 60 in {
def: Loadxu_pat<sextloadi16, i32, anyimm1, L4_loadrh_ur>;
def: Loadxu_pat<sextloadv2i8, v2i16, anyimm1, L4_loadbsw2_ur>;
def: Loadxu_pat<sextloadv4i8, v4i16, anyimm2, L4_loadbzw4_ur>;
def: Loadxu_pat<zextloadi1, i32, anyimm0, L4_loadrub_ur>;
def: Loadxu_pat<zextloadi8, i32, anyimm0, L4_loadrub_ur>;
def: Loadxu_pat<zextloadi16, i32, anyimm1, L4_loadruh_ur>;
def: Loadxu_pat<zextloadv2i8, v2i16, anyimm1, L4_loadbzw2_ur>;
Expand All @@ -2140,6 +2151,11 @@ let AddedComplexity = 60 in {
def: Loadxu_pat<load, f32, anyimm2, L4_loadri_ur>;
def: Loadxu_pat<load, f64, anyimm3, L4_loadrd_ur>;

def: Loadxum_pat<sextloadi1, i32, anyimm0, L1toI32, L4_loadrub_ur>;
def: Loadxum_pat<extloadi1, i64, anyimm0, ToAext64, L4_loadrub_ur>;
def: Loadxum_pat<sextloadi1, i64, anyimm0, L1toI64, L4_loadrub_ur>;
def: Loadxum_pat<zextloadi1, i64, anyimm0, ToZext64, L4_loadrub_ur>;

def: Loadxum_pat<sextloadi8, i64, anyimm0, ToSext64, L4_loadrb_ur>;
def: Loadxum_pat<zextloadi8, i64, anyimm0, ToZext64, L4_loadrub_ur>;
def: Loadxum_pat<extloadi8, i64, anyimm0, ToAext64, L4_loadrub_ur>;
Expand All @@ -2152,7 +2168,9 @@ let AddedComplexity = 60 in {
}

let AddedComplexity = 40 in {
def: Loadxr_shl_pat<extloadi1, i32, L4_loadrub_rr>;
def: Loadxr_shl_pat<extloadi8, i32, L4_loadrub_rr>;
def: Loadxr_shl_pat<zextloadi1, i32, L4_loadrub_rr>;
def: Loadxr_shl_pat<zextloadi8, i32, L4_loadrub_rr>;
def: Loadxr_shl_pat<sextloadi8, i32, L4_loadrb_rr>;
def: Loadxr_shl_pat<extloadi16, i32, L4_loadruh_rr>;
Expand All @@ -2170,8 +2188,10 @@ let AddedComplexity = 40 in {
}

let AddedComplexity = 20 in {
def: Loadxr_add_pat<extloadi1, i32, L4_loadrub_rr>;
def: Loadxr_add_pat<extloadi8, i32, L4_loadrub_rr>;
def: Loadxr_add_pat<zextloadi8, i32, L4_loadrub_rr>;
def: Loadxr_add_pat<zextloadi1, i32, L4_loadrub_rr>;
def: Loadxr_add_pat<sextloadi8, i32, L4_loadrb_rr>;
def: Loadxr_add_pat<extloadi16, i32, L4_loadruh_rr>;
def: Loadxr_add_pat<zextloadi16, i32, L4_loadruh_rr>;
Expand All @@ -2188,6 +2208,11 @@ let AddedComplexity = 20 in {
}

let AddedComplexity = 40 in {
def: Loadxrm_shl_pat<sextloadi1, i32, L1toI32, L4_loadrub_rr>;
def: Loadxrm_shl_pat<extloadi1, i64, ToAext64, L4_loadrub_rr>;
def: Loadxrm_shl_pat<sextloadi1, i64, L1toI64, L4_loadrub_rr>;
def: Loadxrm_shl_pat<zextloadi1, i64, ToZext64, L4_loadrub_rr>;

def: Loadxrm_shl_pat<extloadi8, i64, ToAext64, L4_loadrub_rr>;
def: Loadxrm_shl_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>;
def: Loadxrm_shl_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>;
Expand All @@ -2199,7 +2224,12 @@ let AddedComplexity = 40 in {
def: Loadxrm_shl_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>;
}

let AddedComplexity = 20 in {
let AddedComplexity = 30 in {
def: Loadxrm_add_pat<sextloadi1, i32, L1toI32, L4_loadrub_rr>;
def: Loadxrm_add_pat<extloadi1, i64, ToAext64, L4_loadrub_rr>;
def: Loadxrm_add_pat<sextloadi1, i64, L1toI64, L4_loadrub_rr>;
def: Loadxrm_add_pat<zextloadi1, i64, ToZext64, L4_loadrub_rr>;

def: Loadxrm_add_pat<extloadi8, i64, ToAext64, L4_loadrub_rr>;
def: Loadxrm_add_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>;
def: Loadxrm_add_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>;
Expand All @@ -2214,12 +2244,13 @@ let AddedComplexity = 20 in {
// Absolute address

let AddedComplexity = 60 in {
def: Loada_pat<extloadi1, i32, anyimm0, PS_loadrubabs>;
def: Loada_pat<zextloadi1, i32, anyimm0, PS_loadrubabs>;
def: Loada_pat<sextloadi8, i32, anyimm0, PS_loadrbabs>;
def: Loada_pat<extloadi8, i32, anyimm0, PS_loadrubabs>;
def: Loada_pat<sextloadi8, i32, anyimm0, PS_loadrbabs>;
def: Loada_pat<zextloadi8, i32, anyimm0, PS_loadrubabs>;
def: Loada_pat<sextloadi16, i32, anyimm1, PS_loadrhabs>;
def: Loada_pat<extloadi16, i32, anyimm1, PS_loadruhabs>;
def: Loada_pat<sextloadi16, i32, anyimm1, PS_loadrhabs>;
def: Loada_pat<zextloadi16, i32, anyimm1, PS_loadruhabs>;
def: Loada_pat<load, i32, anyimm2, PS_loadriabs>;
def: Loada_pat<load, v2i16, anyimm2, PS_loadriabs>;
Expand All @@ -2238,6 +2269,12 @@ let AddedComplexity = 60 in {
}

let AddedComplexity = 30 in {
def: Loadam_pat<load, i1, anyimm0, I32toI1, PS_loadrubabs>;
def: Loadam_pat<sextloadi1, i32, anyimm0, L1toI32, PS_loadrubabs>;
def: Loadam_pat<extloadi1, i64, anyimm0, ToZext64, PS_loadrubabs>;
def: Loadam_pat<sextloadi1, i64, anyimm0, L1toI64, PS_loadrubabs>;
def: Loadam_pat<zextloadi1, i64, anyimm0, ToZext64, PS_loadrubabs>;

def: Loadam_pat<extloadi8, i64, anyimm0, ToAext64, PS_loadrubabs>;
def: Loadam_pat<sextloadi8, i64, anyimm0, ToSext64, PS_loadrbabs>;
def: Loadam_pat<zextloadi8, i64, anyimm0, ToZext64, PS_loadrubabs>;
Expand All @@ -2247,9 +2284,6 @@ let AddedComplexity = 30 in {
def: Loadam_pat<extloadi32, i64, anyimm2, ToAext64, PS_loadriabs>;
def: Loadam_pat<sextloadi32, i64, anyimm2, ToSext64, PS_loadriabs>;
def: Loadam_pat<zextloadi32, i64, anyimm2, ToZext64, PS_loadriabs>;

def: Loadam_pat<load, i1, anyimm0, I32toI1, PS_loadrubabs>;
def: Loadam_pat<zextloadi1, i64, anyimm0, ToZext64, PS_loadrubabs>;
}

// GP-relative address
Expand Down Expand Up @@ -2280,6 +2314,11 @@ let AddedComplexity = 100 in {
}

let AddedComplexity = 70 in {
def: Loadam_pat<sextloadi1, i32, addrgp, L1toI32, L2_loadrubgp>;
def: Loadam_pat<extloadi1, i64, addrgp, ToAext64, L2_loadrubgp>;
def: Loadam_pat<sextloadi1, i64, addrgp, L1toI64, L2_loadrubgp>;
def: Loadam_pat<zextloadi1, i64, addrgp, ToZext64, L2_loadrubgp>;

def: Loadam_pat<extloadi8, i64, addrgp, ToAext64, L2_loadrubgp>;
def: Loadam_pat<sextloadi8, i64, addrgp, ToSext64, L2_loadrbgp>;
def: Loadam_pat<zextloadi8, i64, addrgp, ToZext64, L2_loadrubgp>;
Expand All @@ -2291,17 +2330,8 @@ let AddedComplexity = 70 in {
def: Loadam_pat<zextloadi32, i64, addrgp, ToZext64, L2_loadrigp>;

def: Loadam_pat<load, i1, addrgp, I32toI1, L2_loadrubgp>;
def: Loadam_pat<zextloadi1, i64, addrgp, ToZext64, L2_loadrubgp>;
}


// Sign-extending loads of i1 need to replicate the lowest bit throughout
// the 32-bit value. Since the loaded value can only be 0 or 1, 0-v should
// do the trick.
let AddedComplexity = 20 in
def: Pat<(i32 (sextloadi1 I32:$Rs)),
(A2_subri 0, (L2_loadrub_io IntRegs:$Rs, 0))>;

// Patterns for loads of i1:
def: Pat<(i1 (load AddrFI:$fi)),
(C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>;
Expand Down
25 changes: 25 additions & 0 deletions llvm/test/CodeGen/Hexagon/isel-extload-i1.ll
@@ -0,0 +1,25 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=hexagon < %s | FileCheck %s

target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
target triple = "hexagon"

define i64 @f0(i32 %a0, i64 %a1, i32 %a2, i32 %a3, i1 zeroext %a4) #0 {
; CHECK-LABEL: f0:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: r0 = memub(r29+#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r0 = sub(#0,r0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r1 = asr(r0,#31)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = sext i1 %a4 to i64
ret i64 %v0
}

attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+v66,-long-calls" }

0 comments on commit 9c5ed8d

Please sign in to comment.