diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-chain-to-shuffles.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-chain-to-shuffles.ll index 4b551fad5b43a..ca22e44d1c97f 100644 --- a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-chain-to-shuffles.ll +++ b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-chain-to-shuffles.ll @@ -8,558 +8,74 @@ define amdgpu_kernel void @extract_insert_chain_to_shuffles(<16 x i8> %in, <16 x ; OPT-LABEL: define amdgpu_kernel void @extract_insert_chain_to_shuffles( ; OPT-SAME: <16 x i8> [[IN:%.*]], <16 x i8> [[ADD:%.*]], ptr addrspace(3) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] { ; OPT-NEXT: [[ENTRY:.*:]] -; OPT-NEXT: [[ALLOCA:%.*]] = freeze <128 x i8> poison -; OPT-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[IN]], <16 x i8> poison, <128 x i32> -; OPT-NEXT: [[TMP1:%.*]] = shufflevector <128 x i8> [[ALLOCA]], <128 x i8> [[TMP0]], <128 x i32> -; OPT-NEXT: [[TMP2:%.*]] = extractelement <16 x i8> [[IN]], i64 1 -; OPT-NEXT: [[TMP3:%.*]] = insertelement <128 x i8> [[TMP1]], i8 [[TMP2]], i32 1 -; OPT-NEXT: [[TMP4:%.*]] = extractelement <16 x i8> [[IN]], i64 2 -; OPT-NEXT: [[TMP5:%.*]] = insertelement <128 x i8> [[TMP3]], i8 [[TMP4]], i32 2 -; OPT-NEXT: [[TMP6:%.*]] = extractelement <16 x i8> [[IN]], i64 3 -; OPT-NEXT: [[TMP7:%.*]] = insertelement <128 x i8> [[TMP5]], i8 [[TMP6]], i32 3 -; OPT-NEXT: [[TMP8:%.*]] = extractelement <16 x i8> [[IN]], i64 4 -; OPT-NEXT: [[TMP9:%.*]] = insertelement <128 x i8> [[TMP7]], i8 [[TMP8]], i32 4 -; OPT-NEXT: [[TMP10:%.*]] = extractelement <16 x i8> [[IN]], i64 5 -; OPT-NEXT: [[TMP11:%.*]] = insertelement <128 x i8> [[TMP9]], i8 [[TMP10]], i32 5 -; OPT-NEXT: [[TMP12:%.*]] = extractelement <16 x i8> [[IN]], i64 6 -; OPT-NEXT: [[TMP13:%.*]] = insertelement <128 x i8> [[TMP11]], i8 [[TMP12]], i32 6 -; OPT-NEXT: [[TMP14:%.*]] = extractelement <16 x i8> [[IN]], i64 7 -; OPT-NEXT: [[TMP15:%.*]] = insertelement <128 x i8> [[TMP13]], i8 [[TMP14]], i32 7 -; OPT-NEXT: [[TMP16:%.*]] = extractelement <16 x i8> [[IN]], i64 8 -; OPT-NEXT: [[TMP17:%.*]] = insertelement <128 x i8> [[TMP15]], i8 [[TMP16]], i32 8 -; OPT-NEXT: [[TMP18:%.*]] = extractelement <16 x i8> [[IN]], i64 9 -; OPT-NEXT: [[TMP19:%.*]] = insertelement <128 x i8> [[TMP17]], i8 [[TMP18]], i32 9 -; OPT-NEXT: [[TMP20:%.*]] = extractelement <16 x i8> [[IN]], i64 10 -; OPT-NEXT: [[TMP21:%.*]] = insertelement <128 x i8> [[TMP19]], i8 [[TMP20]], i32 10 -; OPT-NEXT: [[TMP22:%.*]] = extractelement <16 x i8> [[IN]], i64 11 -; OPT-NEXT: [[TMP23:%.*]] = insertelement <128 x i8> [[TMP21]], i8 [[TMP22]], i32 11 -; OPT-NEXT: [[TMP24:%.*]] = extractelement <16 x i8> [[IN]], i64 12 -; OPT-NEXT: [[TMP25:%.*]] = insertelement <128 x i8> [[TMP23]], i8 [[TMP24]], i32 12 -; OPT-NEXT: [[TMP26:%.*]] = extractelement <16 x i8> [[IN]], i64 13 -; OPT-NEXT: [[TMP27:%.*]] = insertelement <128 x i8> [[TMP25]], i8 [[TMP26]], i32 13 -; OPT-NEXT: [[TMP28:%.*]] = extractelement <16 x i8> [[IN]], i64 14 -; OPT-NEXT: [[TMP29:%.*]] = insertelement <128 x i8> [[TMP27]], i8 [[TMP28]], i32 14 -; OPT-NEXT: [[TMP30:%.*]] = extractelement <16 x i8> [[IN]], i64 15 -; OPT-NEXT: [[TMP31:%.*]] = insertelement <128 x i8> [[TMP29]], i8 [[TMP30]], i32 15 -; OPT-NEXT: [[TMP32:%.*]] = shufflevector <16 x i8> [[IN]], <16 x i8> poison, <128 x i32> -; OPT-NEXT: [[TMP33:%.*]] = shufflevector <128 x i8> [[TMP31]], <128 x i8> [[TMP32]], <128 x i32> -; OPT-NEXT: [[TMP34:%.*]] = extractelement <16 x i8> [[IN]], i64 1 -; OPT-NEXT: [[TMP35:%.*]] = insertelement <128 x i8> [[TMP33]], i8 [[TMP34]], i32 17 -; OPT-NEXT: [[TMP36:%.*]] = extractelement <16 x i8> [[IN]], i64 2 -; OPT-NEXT: [[TMP37:%.*]] = insertelement <128 x i8> [[TMP35]], i8 [[TMP36]], i32 18 -; OPT-NEXT: [[TMP38:%.*]] = extractelement <16 x i8> [[IN]], i64 3 -; OPT-NEXT: [[TMP39:%.*]] = insertelement <128 x i8> [[TMP37]], i8 [[TMP38]], i32 19 -; OPT-NEXT: [[TMP40:%.*]] = extractelement <16 x i8> [[IN]], i64 4 -; OPT-NEXT: [[TMP41:%.*]] = insertelement <128 x i8> [[TMP39]], i8 [[TMP40]], i32 20 -; OPT-NEXT: [[TMP42:%.*]] = extractelement <16 x i8> [[IN]], i64 5 -; OPT-NEXT: [[TMP43:%.*]] = insertelement <128 x i8> [[TMP41]], i8 [[TMP42]], i32 21 -; OPT-NEXT: [[TMP44:%.*]] = extractelement <16 x i8> [[IN]], i64 6 -; OPT-NEXT: [[TMP45:%.*]] = insertelement <128 x i8> [[TMP43]], i8 [[TMP44]], i32 22 -; OPT-NEXT: [[TMP46:%.*]] = extractelement <16 x i8> [[IN]], i64 7 -; OPT-NEXT: [[TMP47:%.*]] = insertelement <128 x i8> [[TMP45]], i8 [[TMP46]], i32 23 -; OPT-NEXT: [[TMP48:%.*]] = extractelement <16 x i8> [[IN]], i64 8 -; OPT-NEXT: [[TMP49:%.*]] = insertelement <128 x i8> [[TMP47]], i8 [[TMP48]], i32 24 -; OPT-NEXT: [[TMP50:%.*]] = extractelement <16 x i8> [[IN]], i64 9 -; OPT-NEXT: [[TMP51:%.*]] = insertelement <128 x i8> [[TMP49]], i8 [[TMP50]], i32 25 -; OPT-NEXT: [[TMP52:%.*]] = extractelement <16 x i8> [[IN]], i64 10 -; OPT-NEXT: [[TMP53:%.*]] = insertelement <128 x i8> [[TMP51]], i8 [[TMP52]], i32 26 -; OPT-NEXT: [[TMP54:%.*]] = extractelement <16 x i8> [[IN]], i64 11 -; OPT-NEXT: [[TMP55:%.*]] = insertelement <128 x i8> [[TMP53]], i8 [[TMP54]], i32 27 -; OPT-NEXT: [[TMP56:%.*]] = extractelement <16 x i8> [[IN]], i64 12 -; OPT-NEXT: [[TMP57:%.*]] = insertelement <128 x i8> [[TMP55]], i8 [[TMP56]], i32 28 -; OPT-NEXT: [[TMP58:%.*]] = extractelement <16 x i8> [[IN]], i64 13 -; OPT-NEXT: [[TMP59:%.*]] = insertelement <128 x i8> [[TMP57]], i8 [[TMP58]], i32 29 -; OPT-NEXT: [[TMP60:%.*]] = extractelement <16 x i8> [[IN]], i64 14 -; OPT-NEXT: [[TMP61:%.*]] = insertelement <128 x i8> [[TMP59]], i8 [[TMP60]], i32 30 -; OPT-NEXT: [[TMP62:%.*]] = extractelement <16 x i8> [[IN]], i64 15 -; OPT-NEXT: [[TMP63:%.*]] = insertelement <128 x i8> [[TMP61]], i8 [[TMP62]], i32 31 -; OPT-NEXT: [[TMP64:%.*]] = shufflevector <16 x i8> [[IN]], <16 x i8> poison, <128 x i32> -; OPT-NEXT: [[TMP65:%.*]] = shufflevector <128 x i8> [[TMP63]], <128 x i8> [[TMP64]], <128 x i32> -; OPT-NEXT: [[TMP66:%.*]] = extractelement <16 x i8> [[IN]], i64 1 -; OPT-NEXT: [[TMP67:%.*]] = insertelement <128 x i8> [[TMP65]], i8 [[TMP66]], i32 33 -; OPT-NEXT: [[TMP68:%.*]] = extractelement <16 x i8> [[IN]], i64 2 -; OPT-NEXT: [[TMP69:%.*]] = insertelement <128 x i8> [[TMP67]], i8 [[TMP68]], i32 34 -; OPT-NEXT: [[TMP70:%.*]] = extractelement <16 x i8> [[IN]], i64 3 -; OPT-NEXT: [[TMP71:%.*]] = insertelement <128 x i8> [[TMP69]], i8 [[TMP70]], i32 35 -; OPT-NEXT: [[TMP72:%.*]] = extractelement <16 x i8> [[IN]], i64 4 -; OPT-NEXT: [[TMP73:%.*]] = insertelement <128 x i8> [[TMP71]], i8 [[TMP72]], i32 36 -; OPT-NEXT: [[TMP74:%.*]] = extractelement <16 x i8> [[IN]], i64 5 -; OPT-NEXT: [[TMP75:%.*]] = insertelement <128 x i8> [[TMP73]], i8 [[TMP74]], i32 37 -; OPT-NEXT: [[TMP76:%.*]] = extractelement <16 x i8> [[IN]], i64 6 -; OPT-NEXT: [[TMP77:%.*]] = insertelement <128 x i8> [[TMP75]], i8 [[TMP76]], i32 38 -; OPT-NEXT: [[TMP78:%.*]] = extractelement <16 x i8> [[IN]], i64 7 -; OPT-NEXT: [[TMP79:%.*]] = insertelement <128 x i8> [[TMP77]], i8 [[TMP78]], i32 39 -; OPT-NEXT: [[TMP80:%.*]] = extractelement <16 x i8> [[IN]], i64 8 -; OPT-NEXT: [[TMP81:%.*]] = insertelement <128 x i8> [[TMP79]], i8 [[TMP80]], i32 40 -; OPT-NEXT: [[TMP82:%.*]] = extractelement <16 x i8> [[IN]], i64 9 -; OPT-NEXT: [[TMP83:%.*]] = insertelement <128 x i8> [[TMP81]], i8 [[TMP82]], i32 41 -; OPT-NEXT: [[TMP84:%.*]] = extractelement <16 x i8> [[IN]], i64 10 -; OPT-NEXT: [[TMP85:%.*]] = insertelement <128 x i8> [[TMP83]], i8 [[TMP84]], i32 42 -; OPT-NEXT: [[TMP86:%.*]] = extractelement <16 x i8> [[IN]], i64 11 -; OPT-NEXT: [[TMP87:%.*]] = insertelement <128 x i8> [[TMP85]], i8 [[TMP86]], i32 43 -; OPT-NEXT: [[TMP88:%.*]] = extractelement <16 x i8> [[IN]], i64 12 -; OPT-NEXT: [[TMP89:%.*]] = insertelement <128 x i8> [[TMP87]], i8 [[TMP88]], i32 44 -; OPT-NEXT: [[TMP90:%.*]] = extractelement <16 x i8> [[IN]], i64 13 -; OPT-NEXT: [[TMP91:%.*]] = insertelement <128 x i8> [[TMP89]], i8 [[TMP90]], i32 45 -; OPT-NEXT: [[TMP92:%.*]] = extractelement <16 x i8> [[IN]], i64 14 -; OPT-NEXT: [[TMP93:%.*]] = insertelement <128 x i8> [[TMP91]], i8 [[TMP92]], i32 46 -; OPT-NEXT: [[TMP94:%.*]] = extractelement <16 x i8> [[IN]], i64 15 -; OPT-NEXT: [[TMP95:%.*]] = insertelement <128 x i8> [[TMP93]], i8 [[TMP94]], i32 47 -; OPT-NEXT: [[TMP96:%.*]] = shufflevector <16 x i8> [[IN]], <16 x i8> poison, <128 x i32> -; OPT-NEXT: [[TMP97:%.*]] = shufflevector <128 x i8> [[TMP95]], <128 x i8> [[TMP96]], <128 x i32> -; OPT-NEXT: [[TMP98:%.*]] = extractelement <16 x i8> [[IN]], i64 1 -; OPT-NEXT: [[TMP99:%.*]] = insertelement <128 x i8> [[TMP97]], i8 [[TMP98]], i32 49 -; OPT-NEXT: [[TMP100:%.*]] = extractelement <16 x i8> [[IN]], i64 2 -; OPT-NEXT: [[TMP101:%.*]] = insertelement <128 x i8> [[TMP99]], i8 [[TMP100]], i32 50 -; OPT-NEXT: [[TMP102:%.*]] = extractelement <16 x i8> [[IN]], i64 3 -; OPT-NEXT: [[TMP103:%.*]] = insertelement <128 x i8> [[TMP101]], i8 [[TMP102]], i32 51 -; OPT-NEXT: [[TMP104:%.*]] = extractelement <16 x i8> [[IN]], i64 4 -; OPT-NEXT: [[TMP105:%.*]] = insertelement <128 x i8> [[TMP103]], i8 [[TMP104]], i32 52 -; OPT-NEXT: [[TMP106:%.*]] = extractelement <16 x i8> [[IN]], i64 5 -; OPT-NEXT: [[TMP107:%.*]] = insertelement <128 x i8> [[TMP105]], i8 [[TMP106]], i32 53 -; OPT-NEXT: [[TMP108:%.*]] = extractelement <16 x i8> [[IN]], i64 6 -; OPT-NEXT: [[TMP109:%.*]] = insertelement <128 x i8> [[TMP107]], i8 [[TMP108]], i32 54 -; OPT-NEXT: [[TMP110:%.*]] = extractelement <16 x i8> [[IN]], i64 7 -; OPT-NEXT: [[TMP111:%.*]] = insertelement <128 x i8> [[TMP109]], i8 [[TMP110]], i32 55 -; OPT-NEXT: [[TMP112:%.*]] = extractelement <16 x i8> [[IN]], i64 8 -; OPT-NEXT: [[TMP113:%.*]] = insertelement <128 x i8> [[TMP111]], i8 [[TMP112]], i32 56 -; OPT-NEXT: [[TMP114:%.*]] = extractelement <16 x i8> [[IN]], i64 9 -; OPT-NEXT: [[TMP115:%.*]] = insertelement <128 x i8> [[TMP113]], i8 [[TMP114]], i32 57 -; OPT-NEXT: [[TMP116:%.*]] = extractelement <16 x i8> [[IN]], i64 10 -; OPT-NEXT: [[TMP117:%.*]] = insertelement <128 x i8> [[TMP115]], i8 [[TMP116]], i32 58 -; OPT-NEXT: [[TMP118:%.*]] = extractelement <16 x i8> [[IN]], i64 11 -; OPT-NEXT: [[TMP119:%.*]] = insertelement <128 x i8> [[TMP117]], i8 [[TMP118]], i32 59 -; OPT-NEXT: [[TMP120:%.*]] = extractelement <16 x i8> [[IN]], i64 12 -; OPT-NEXT: [[TMP121:%.*]] = insertelement <128 x i8> [[TMP119]], i8 [[TMP120]], i32 60 -; OPT-NEXT: [[TMP122:%.*]] = extractelement <16 x i8> [[IN]], i64 13 -; OPT-NEXT: [[TMP123:%.*]] = insertelement <128 x i8> [[TMP121]], i8 [[TMP122]], i32 61 -; OPT-NEXT: [[TMP124:%.*]] = extractelement <16 x i8> [[IN]], i64 14 -; OPT-NEXT: [[TMP125:%.*]] = insertelement <128 x i8> [[TMP123]], i8 [[TMP124]], i32 62 -; OPT-NEXT: [[TMP126:%.*]] = extractelement <16 x i8> [[IN]], i64 15 -; OPT-NEXT: [[TMP127:%.*]] = insertelement <128 x i8> [[TMP125]], i8 [[TMP126]], i32 63 -; OPT-NEXT: [[TMP128:%.*]] = shufflevector <16 x i8> [[IN]], <16 x i8> poison, <128 x i32> -; OPT-NEXT: [[TMP129:%.*]] = shufflevector <128 x i8> [[TMP127]], <128 x i8> [[TMP128]], <128 x i32> -; OPT-NEXT: [[TMP130:%.*]] = extractelement <16 x i8> [[IN]], i64 1 -; OPT-NEXT: [[TMP131:%.*]] = insertelement <128 x i8> [[TMP129]], i8 [[TMP130]], i32 65 -; OPT-NEXT: [[TMP132:%.*]] = extractelement <16 x i8> [[IN]], i64 2 -; OPT-NEXT: [[TMP133:%.*]] = insertelement <128 x i8> [[TMP131]], i8 [[TMP132]], i32 66 -; OPT-NEXT: [[TMP134:%.*]] = extractelement <16 x i8> [[IN]], i64 3 -; OPT-NEXT: [[TMP135:%.*]] = insertelement <128 x i8> [[TMP133]], i8 [[TMP134]], i32 67 -; OPT-NEXT: [[TMP136:%.*]] = extractelement <16 x i8> [[IN]], i64 4 -; OPT-NEXT: [[TMP137:%.*]] = insertelement <128 x i8> [[TMP135]], i8 [[TMP136]], i32 68 -; OPT-NEXT: [[TMP138:%.*]] = extractelement <16 x i8> [[IN]], i64 5 -; OPT-NEXT: [[TMP139:%.*]] = insertelement <128 x i8> [[TMP137]], i8 [[TMP138]], i32 69 -; OPT-NEXT: [[TMP140:%.*]] = extractelement <16 x i8> [[IN]], i64 6 -; OPT-NEXT: [[TMP141:%.*]] = insertelement <128 x i8> [[TMP139]], i8 [[TMP140]], i32 70 -; OPT-NEXT: [[TMP142:%.*]] = extractelement <16 x i8> [[IN]], i64 7 -; OPT-NEXT: [[TMP143:%.*]] = insertelement <128 x i8> [[TMP141]], i8 [[TMP142]], i32 71 -; OPT-NEXT: [[TMP144:%.*]] = extractelement <16 x i8> [[IN]], i64 8 -; OPT-NEXT: [[TMP145:%.*]] = insertelement <128 x i8> [[TMP143]], i8 [[TMP144]], i32 72 -; OPT-NEXT: [[TMP146:%.*]] = extractelement <16 x i8> [[IN]], i64 9 -; OPT-NEXT: [[TMP147:%.*]] = insertelement <128 x i8> [[TMP145]], i8 [[TMP146]], i32 73 -; OPT-NEXT: [[TMP148:%.*]] = extractelement <16 x i8> [[IN]], i64 10 -; OPT-NEXT: [[TMP149:%.*]] = insertelement <128 x i8> [[TMP147]], i8 [[TMP148]], i32 74 -; OPT-NEXT: [[TMP150:%.*]] = extractelement <16 x i8> [[IN]], i64 11 -; OPT-NEXT: [[TMP151:%.*]] = insertelement <128 x i8> [[TMP149]], i8 [[TMP150]], i32 75 -; OPT-NEXT: [[TMP152:%.*]] = extractelement <16 x i8> [[IN]], i64 12 -; OPT-NEXT: [[TMP153:%.*]] = insertelement <128 x i8> [[TMP151]], i8 [[TMP152]], i32 76 -; OPT-NEXT: [[TMP154:%.*]] = extractelement <16 x i8> [[IN]], i64 13 -; OPT-NEXT: [[TMP155:%.*]] = insertelement <128 x i8> [[TMP153]], i8 [[TMP154]], i32 77 -; OPT-NEXT: [[TMP156:%.*]] = extractelement <16 x i8> [[IN]], i64 14 -; OPT-NEXT: [[TMP157:%.*]] = insertelement <128 x i8> [[TMP155]], i8 [[TMP156]], i32 78 -; OPT-NEXT: [[TMP158:%.*]] = extractelement <16 x i8> [[IN]], i64 15 -; OPT-NEXT: [[TMP159:%.*]] = insertelement <128 x i8> [[TMP157]], i8 [[TMP158]], i32 79 -; OPT-NEXT: [[TMP160:%.*]] = shufflevector <16 x i8> [[IN]], <16 x i8> poison, <128 x i32> -; OPT-NEXT: [[TMP161:%.*]] = shufflevector <128 x i8> [[TMP159]], <128 x i8> [[TMP160]], <128 x i32> -; OPT-NEXT: [[TMP162:%.*]] = extractelement <16 x i8> [[IN]], i64 1 -; OPT-NEXT: [[TMP163:%.*]] = insertelement <128 x i8> [[TMP161]], i8 [[TMP162]], i32 81 -; OPT-NEXT: [[TMP164:%.*]] = extractelement <16 x i8> [[IN]], i64 2 -; OPT-NEXT: [[TMP165:%.*]] = insertelement <128 x i8> [[TMP163]], i8 [[TMP164]], i32 82 -; OPT-NEXT: [[TMP166:%.*]] = extractelement <16 x i8> [[IN]], i64 3 -; OPT-NEXT: [[TMP167:%.*]] = insertelement <128 x i8> [[TMP165]], i8 [[TMP166]], i32 83 -; OPT-NEXT: [[TMP168:%.*]] = extractelement <16 x i8> [[IN]], i64 4 -; OPT-NEXT: [[TMP169:%.*]] = insertelement <128 x i8> [[TMP167]], i8 [[TMP168]], i32 84 -; OPT-NEXT: [[TMP170:%.*]] = extractelement <16 x i8> [[IN]], i64 5 -; OPT-NEXT: [[TMP171:%.*]] = insertelement <128 x i8> [[TMP169]], i8 [[TMP170]], i32 85 -; OPT-NEXT: [[TMP172:%.*]] = extractelement <16 x i8> [[IN]], i64 6 -; OPT-NEXT: [[TMP173:%.*]] = insertelement <128 x i8> [[TMP171]], i8 [[TMP172]], i32 86 -; OPT-NEXT: [[TMP174:%.*]] = extractelement <16 x i8> [[IN]], i64 7 -; OPT-NEXT: [[TMP175:%.*]] = insertelement <128 x i8> [[TMP173]], i8 [[TMP174]], i32 87 -; OPT-NEXT: [[TMP176:%.*]] = extractelement <16 x i8> [[IN]], i64 8 -; OPT-NEXT: [[TMP177:%.*]] = insertelement <128 x i8> [[TMP175]], i8 [[TMP176]], i32 88 -; OPT-NEXT: [[TMP178:%.*]] = extractelement <16 x i8> [[IN]], i64 9 -; OPT-NEXT: [[TMP179:%.*]] = insertelement <128 x i8> [[TMP177]], i8 [[TMP178]], i32 89 -; OPT-NEXT: [[TMP180:%.*]] = extractelement <16 x i8> [[IN]], i64 10 -; OPT-NEXT: [[TMP181:%.*]] = insertelement <128 x i8> [[TMP179]], i8 [[TMP180]], i32 90 -; OPT-NEXT: [[TMP182:%.*]] = extractelement <16 x i8> [[IN]], i64 11 -; OPT-NEXT: [[TMP183:%.*]] = insertelement <128 x i8> [[TMP181]], i8 [[TMP182]], i32 91 -; OPT-NEXT: [[TMP184:%.*]] = extractelement <16 x i8> [[IN]], i64 12 -; OPT-NEXT: [[TMP185:%.*]] = insertelement <128 x i8> [[TMP183]], i8 [[TMP184]], i32 92 -; OPT-NEXT: [[TMP186:%.*]] = extractelement <16 x i8> [[IN]], i64 13 -; OPT-NEXT: [[TMP187:%.*]] = insertelement <128 x i8> [[TMP185]], i8 [[TMP186]], i32 93 -; OPT-NEXT: [[TMP188:%.*]] = extractelement <16 x i8> [[IN]], i64 14 -; OPT-NEXT: [[TMP189:%.*]] = insertelement <128 x i8> [[TMP187]], i8 [[TMP188]], i32 94 -; OPT-NEXT: [[TMP190:%.*]] = extractelement <16 x i8> [[IN]], i64 15 -; OPT-NEXT: [[TMP191:%.*]] = insertelement <128 x i8> [[TMP189]], i8 [[TMP190]], i32 95 -; OPT-NEXT: [[TMP192:%.*]] = shufflevector <16 x i8> [[IN]], <16 x i8> poison, <128 x i32> -; OPT-NEXT: [[TMP193:%.*]] = shufflevector <128 x i8> [[TMP191]], <128 x i8> [[TMP192]], <128 x i32> -; OPT-NEXT: [[TMP194:%.*]] = extractelement <16 x i8> [[IN]], i64 1 -; OPT-NEXT: [[TMP195:%.*]] = insertelement <128 x i8> [[TMP193]], i8 [[TMP194]], i32 97 -; OPT-NEXT: [[TMP196:%.*]] = extractelement <16 x i8> [[IN]], i64 2 -; OPT-NEXT: [[TMP197:%.*]] = insertelement <128 x i8> [[TMP195]], i8 [[TMP196]], i32 98 -; OPT-NEXT: [[TMP198:%.*]] = extractelement <16 x i8> [[IN]], i64 3 -; OPT-NEXT: [[TMP199:%.*]] = insertelement <128 x i8> [[TMP197]], i8 [[TMP198]], i32 99 -; OPT-NEXT: [[TMP200:%.*]] = extractelement <16 x i8> [[IN]], i64 4 -; OPT-NEXT: [[TMP201:%.*]] = insertelement <128 x i8> [[TMP199]], i8 [[TMP200]], i32 100 -; OPT-NEXT: [[TMP202:%.*]] = extractelement <16 x i8> [[IN]], i64 5 -; OPT-NEXT: [[TMP203:%.*]] = insertelement <128 x i8> [[TMP201]], i8 [[TMP202]], i32 101 -; OPT-NEXT: [[TMP204:%.*]] = extractelement <16 x i8> [[IN]], i64 6 -; OPT-NEXT: [[TMP205:%.*]] = insertelement <128 x i8> [[TMP203]], i8 [[TMP204]], i32 102 -; OPT-NEXT: [[TMP206:%.*]] = extractelement <16 x i8> [[IN]], i64 7 -; OPT-NEXT: [[TMP207:%.*]] = insertelement <128 x i8> [[TMP205]], i8 [[TMP206]], i32 103 -; OPT-NEXT: [[TMP208:%.*]] = extractelement <16 x i8> [[IN]], i64 8 -; OPT-NEXT: [[TMP209:%.*]] = insertelement <128 x i8> [[TMP207]], i8 [[TMP208]], i32 104 -; OPT-NEXT: [[TMP210:%.*]] = extractelement <16 x i8> [[IN]], i64 9 -; OPT-NEXT: [[TMP211:%.*]] = insertelement <128 x i8> [[TMP209]], i8 [[TMP210]], i32 105 -; OPT-NEXT: [[TMP212:%.*]] = extractelement <16 x i8> [[IN]], i64 10 -; OPT-NEXT: [[TMP213:%.*]] = insertelement <128 x i8> [[TMP211]], i8 [[TMP212]], i32 106 -; OPT-NEXT: [[TMP214:%.*]] = extractelement <16 x i8> [[IN]], i64 11 -; OPT-NEXT: [[TMP215:%.*]] = insertelement <128 x i8> [[TMP213]], i8 [[TMP214]], i32 107 -; OPT-NEXT: [[TMP216:%.*]] = extractelement <16 x i8> [[IN]], i64 12 -; OPT-NEXT: [[TMP217:%.*]] = insertelement <128 x i8> [[TMP215]], i8 [[TMP216]], i32 108 -; OPT-NEXT: [[TMP218:%.*]] = extractelement <16 x i8> [[IN]], i64 13 -; OPT-NEXT: [[TMP219:%.*]] = insertelement <128 x i8> [[TMP217]], i8 [[TMP218]], i32 109 -; OPT-NEXT: [[TMP220:%.*]] = extractelement <16 x i8> [[IN]], i64 14 -; OPT-NEXT: [[TMP221:%.*]] = insertelement <128 x i8> [[TMP219]], i8 [[TMP220]], i32 110 -; OPT-NEXT: [[TMP222:%.*]] = extractelement <16 x i8> [[IN]], i64 15 -; OPT-NEXT: [[TMP223:%.*]] = insertelement <128 x i8> [[TMP221]], i8 [[TMP222]], i32 111 -; OPT-NEXT: [[TMP224:%.*]] = shufflevector <16 x i8> [[IN]], <16 x i8> poison, <128 x i32> -; OPT-NEXT: [[TMP225:%.*]] = shufflevector <128 x i8> [[TMP223]], <128 x i8> [[TMP224]], <128 x i32> ; OPT-NEXT: [[TMP226:%.*]] = extractelement <16 x i8> [[IN]], i64 1 -; OPT-NEXT: [[TMP227:%.*]] = insertelement <128 x i8> [[TMP225]], i8 [[TMP226]], i32 113 ; OPT-NEXT: [[TMP228:%.*]] = extractelement <16 x i8> [[IN]], i64 2 -; OPT-NEXT: [[TMP229:%.*]] = insertelement <128 x i8> [[TMP227]], i8 [[TMP228]], i32 114 ; OPT-NEXT: [[TMP230:%.*]] = extractelement <16 x i8> [[IN]], i64 3 -; OPT-NEXT: [[TMP231:%.*]] = insertelement <128 x i8> [[TMP229]], i8 [[TMP230]], i32 115 ; OPT-NEXT: [[TMP232:%.*]] = extractelement <16 x i8> [[IN]], i64 4 -; OPT-NEXT: [[TMP233:%.*]] = insertelement <128 x i8> [[TMP231]], i8 [[TMP232]], i32 116 ; OPT-NEXT: [[TMP234:%.*]] = extractelement <16 x i8> [[IN]], i64 5 -; OPT-NEXT: [[TMP235:%.*]] = insertelement <128 x i8> [[TMP233]], i8 [[TMP234]], i32 117 ; OPT-NEXT: [[TMP236:%.*]] = extractelement <16 x i8> [[IN]], i64 6 -; OPT-NEXT: [[TMP237:%.*]] = insertelement <128 x i8> [[TMP235]], i8 [[TMP236]], i32 118 ; OPT-NEXT: [[TMP238:%.*]] = extractelement <16 x i8> [[IN]], i64 7 -; OPT-NEXT: [[TMP239:%.*]] = insertelement <128 x i8> [[TMP237]], i8 [[TMP238]], i32 119 ; OPT-NEXT: [[TMP240:%.*]] = extractelement <16 x i8> [[IN]], i64 8 -; OPT-NEXT: [[TMP241:%.*]] = insertelement <128 x i8> [[TMP239]], i8 [[TMP240]], i32 120 ; OPT-NEXT: [[TMP242:%.*]] = extractelement <16 x i8> [[IN]], i64 9 -; OPT-NEXT: [[TMP243:%.*]] = insertelement <128 x i8> [[TMP241]], i8 [[TMP242]], i32 121 ; OPT-NEXT: [[TMP244:%.*]] = extractelement <16 x i8> [[IN]], i64 10 -; OPT-NEXT: [[TMP245:%.*]] = insertelement <128 x i8> [[TMP243]], i8 [[TMP244]], i32 122 ; OPT-NEXT: [[TMP246:%.*]] = extractelement <16 x i8> [[IN]], i64 11 -; OPT-NEXT: [[TMP247:%.*]] = insertelement <128 x i8> [[TMP245]], i8 [[TMP246]], i32 123 ; OPT-NEXT: [[TMP248:%.*]] = extractelement <16 x i8> [[IN]], i64 12 -; OPT-NEXT: [[TMP249:%.*]] = insertelement <128 x i8> [[TMP247]], i8 [[TMP248]], i32 124 ; OPT-NEXT: [[TMP250:%.*]] = extractelement <16 x i8> [[IN]], i64 13 -; OPT-NEXT: [[TMP251:%.*]] = insertelement <128 x i8> [[TMP249]], i8 [[TMP250]], i32 125 ; OPT-NEXT: [[TMP252:%.*]] = extractelement <16 x i8> [[IN]], i64 14 -; OPT-NEXT: [[TMP253:%.*]] = insertelement <128 x i8> [[TMP251]], i8 [[TMP252]], i32 126 -; OPT-NEXT: [[TMP254:%.*]] = extractelement <16 x i8> [[IN]], i64 15 -; OPT-NEXT: [[TMP255:%.*]] = insertelement <128 x i8> [[TMP253]], i8 [[TMP254]], i32 127 ; OPT-NEXT: [[TMP256:%.*]] = shufflevector <16 x i8> [[IN]], <16 x i8> poison, <16 x i32> -; OPT-NEXT: [[TMP257:%.*]] = insertelement <16 x i8> [[TMP256]], i8 [[TMP162]], i64 1 -; OPT-NEXT: [[TMP258:%.*]] = insertelement <16 x i8> [[TMP257]], i8 [[TMP164]], i64 2 -; OPT-NEXT: [[TMP259:%.*]] = insertelement <16 x i8> [[TMP258]], i8 [[TMP166]], i64 3 -; OPT-NEXT: [[TMP260:%.*]] = insertelement <16 x i8> [[TMP259]], i8 [[TMP168]], i64 4 -; OPT-NEXT: [[TMP261:%.*]] = insertelement <16 x i8> [[TMP260]], i8 [[TMP170]], i64 5 -; OPT-NEXT: [[TMP262:%.*]] = insertelement <16 x i8> [[TMP261]], i8 [[TMP172]], i64 6 -; OPT-NEXT: [[TMP263:%.*]] = insertelement <16 x i8> [[TMP262]], i8 [[TMP174]], i64 7 -; OPT-NEXT: [[TMP264:%.*]] = insertelement <16 x i8> [[TMP263]], i8 [[TMP176]], i64 8 -; OPT-NEXT: [[TMP265:%.*]] = insertelement <16 x i8> [[TMP264]], i8 [[TMP178]], i64 9 -; OPT-NEXT: [[TMP266:%.*]] = insertelement <16 x i8> [[TMP265]], i8 [[TMP180]], i64 10 -; OPT-NEXT: [[TMP267:%.*]] = insertelement <16 x i8> [[TMP266]], i8 [[TMP182]], i64 11 -; OPT-NEXT: [[TMP268:%.*]] = insertelement <16 x i8> [[TMP267]], i8 [[TMP184]], i64 12 -; OPT-NEXT: [[TMP269:%.*]] = insertelement <16 x i8> [[TMP268]], i8 [[TMP186]], i64 13 -; OPT-NEXT: [[TMP270:%.*]] = insertelement <16 x i8> [[TMP269]], i8 [[TMP188]], i64 14 +; OPT-NEXT: [[I257:%.*]] = insertelement <16 x i8> [[TMP256]], i8 [[TMP226]], i64 1 +; OPT-NEXT: [[I258:%.*]] = insertelement <16 x i8> [[I257]], i8 [[TMP228]], i64 2 +; OPT-NEXT: [[I259:%.*]] = insertelement <16 x i8> [[I258]], i8 [[TMP230]], i64 3 +; OPT-NEXT: [[I260:%.*]] = insertelement <16 x i8> [[I259]], i8 [[TMP232]], i64 4 +; OPT-NEXT: [[I261:%.*]] = insertelement <16 x i8> [[I260]], i8 [[TMP234]], i64 5 +; OPT-NEXT: [[I262:%.*]] = insertelement <16 x i8> [[I261]], i8 [[TMP236]], i64 6 +; OPT-NEXT: [[I263:%.*]] = insertelement <16 x i8> [[I262]], i8 [[TMP238]], i64 7 +; OPT-NEXT: [[I264:%.*]] = insertelement <16 x i8> [[I263]], i8 [[TMP240]], i64 8 +; OPT-NEXT: [[I265:%.*]] = insertelement <16 x i8> [[I264]], i8 [[TMP242]], i64 9 +; OPT-NEXT: [[I266:%.*]] = insertelement <16 x i8> [[I265]], i8 [[TMP244]], i64 10 +; OPT-NEXT: [[I267:%.*]] = insertelement <16 x i8> [[I266]], i8 [[TMP246]], i64 11 +; OPT-NEXT: [[I268:%.*]] = insertelement <16 x i8> [[I267]], i8 [[TMP248]], i64 12 +; OPT-NEXT: [[I269:%.*]] = insertelement <16 x i8> [[I268]], i8 [[TMP250]], i64 13 +; OPT-NEXT: [[TMP270:%.*]] = insertelement <16 x i8> [[I269]], i8 [[TMP252]], i64 14 ; OPT-NEXT: [[TMP271:%.*]] = shufflevector <16 x i8> [[TMP270]], <16 x i8> [[IN]], <16 x i32> ; OPT-NEXT: [[SUM:%.*]] = add <16 x i8> [[TMP271]], [[ADD]] ; OPT-NEXT: store <16 x i8> [[SUM]], ptr addrspace(3) [[OUT]], align 16 ; OPT-NEXT: ret void ; entry: - %alloca = freeze <128 x i8> poison - %0 = extractelement <16 x i8> %in, i64 0 - %1 = insertelement <128 x i8> %alloca, i8 %0, i32 0 - %2 = extractelement <16 x i8> %in, i64 1 - %3 = insertelement <128 x i8> %1, i8 %2, i32 1 - %4 = extractelement <16 x i8> %in, i64 2 - %5 = insertelement <128 x i8> %3, i8 %4, i32 2 - %6 = extractelement <16 x i8> %in, i64 3 - %7 = insertelement <128 x i8> %5, i8 %6, i32 3 - %8 = extractelement <16 x i8> %in, i64 4 - %9 = insertelement <128 x i8> %7, i8 %8, i32 4 - %10 = extractelement <16 x i8> %in, i64 5 - %11 = insertelement <128 x i8> %9, i8 %10, i32 5 - %12 = extractelement <16 x i8> %in, i64 6 - %13 = insertelement <128 x i8> %11, i8 %12, i32 6 - %14 = extractelement <16 x i8> %in, i64 7 - %15 = insertelement <128 x i8> %13, i8 %14, i32 7 - %16 = extractelement <16 x i8> %in, i64 8 - %17 = insertelement <128 x i8> %15, i8 %16, i32 8 - %18 = extractelement <16 x i8> %in, i64 9 - %19 = insertelement <128 x i8> %17, i8 %18, i32 9 - %20 = extractelement <16 x i8> %in, i64 10 - %21 = insertelement <128 x i8> %19, i8 %20, i32 10 - %22 = extractelement <16 x i8> %in, i64 11 - %23 = insertelement <128 x i8> %21, i8 %22, i32 11 - %24 = extractelement <16 x i8> %in, i64 12 - %25 = insertelement <128 x i8> %23, i8 %24, i32 12 - %26 = extractelement <16 x i8> %in, i64 13 - %27 = insertelement <128 x i8> %25, i8 %26, i32 13 - %28 = extractelement <16 x i8> %in, i64 14 - %29 = insertelement <128 x i8> %27, i8 %28, i32 14 - %30 = extractelement <16 x i8> %in, i64 15 - %31 = insertelement <128 x i8> %29, i8 %30, i32 15 - %32 = extractelement <16 x i8> %in, i64 0 - %33 = insertelement <128 x i8> %31, i8 %32, i32 16 - %34 = extractelement <16 x i8> %in, i64 1 - %35 = insertelement <128 x i8> %33, i8 %34, i32 17 - %36 = extractelement <16 x i8> %in, i64 2 - %37 = insertelement <128 x i8> %35, i8 %36, i32 18 - %38 = extractelement <16 x i8> %in, i64 3 - %39 = insertelement <128 x i8> %37, i8 %38, i32 19 - %40 = extractelement <16 x i8> %in, i64 4 - %41 = insertelement <128 x i8> %39, i8 %40, i32 20 - %42 = extractelement <16 x i8> %in, i64 5 - %43 = insertelement <128 x i8> %41, i8 %42, i32 21 - %44 = extractelement <16 x i8> %in, i64 6 - %45 = insertelement <128 x i8> %43, i8 %44, i32 22 - %46 = extractelement <16 x i8> %in, i64 7 - %47 = insertelement <128 x i8> %45, i8 %46, i32 23 - %48 = extractelement <16 x i8> %in, i64 8 - %49 = insertelement <128 x i8> %47, i8 %48, i32 24 - %50 = extractelement <16 x i8> %in, i64 9 - %51 = insertelement <128 x i8> %49, i8 %50, i32 25 - %52 = extractelement <16 x i8> %in, i64 10 - %53 = insertelement <128 x i8> %51, i8 %52, i32 26 - %54 = extractelement <16 x i8> %in, i64 11 - %55 = insertelement <128 x i8> %53, i8 %54, i32 27 - %56 = extractelement <16 x i8> %in, i64 12 - %57 = insertelement <128 x i8> %55, i8 %56, i32 28 - %58 = extractelement <16 x i8> %in, i64 13 - %59 = insertelement <128 x i8> %57, i8 %58, i32 29 - %60 = extractelement <16 x i8> %in, i64 14 - %61 = insertelement <128 x i8> %59, i8 %60, i32 30 - %62 = extractelement <16 x i8> %in, i64 15 - %63 = insertelement <128 x i8> %61, i8 %62, i32 31 - %64 = extractelement <16 x i8> %in, i64 0 - %65 = insertelement <128 x i8> %63, i8 %64, i32 32 - %66 = extractelement <16 x i8> %in, i64 1 - %67 = insertelement <128 x i8> %65, i8 %66, i32 33 - %68 = extractelement <16 x i8> %in, i64 2 - %69 = insertelement <128 x i8> %67, i8 %68, i32 34 - %70 = extractelement <16 x i8> %in, i64 3 - %71 = insertelement <128 x i8> %69, i8 %70, i32 35 - %72 = extractelement <16 x i8> %in, i64 4 - %73 = insertelement <128 x i8> %71, i8 %72, i32 36 - %74 = extractelement <16 x i8> %in, i64 5 - %75 = insertelement <128 x i8> %73, i8 %74, i32 37 - %76 = extractelement <16 x i8> %in, i64 6 - %77 = insertelement <128 x i8> %75, i8 %76, i32 38 - %78 = extractelement <16 x i8> %in, i64 7 - %79 = insertelement <128 x i8> %77, i8 %78, i32 39 - %80 = extractelement <16 x i8> %in, i64 8 - %81 = insertelement <128 x i8> %79, i8 %80, i32 40 - %82 = extractelement <16 x i8> %in, i64 9 - %83 = insertelement <128 x i8> %81, i8 %82, i32 41 - %84 = extractelement <16 x i8> %in, i64 10 - %85 = insertelement <128 x i8> %83, i8 %84, i32 42 - %86 = extractelement <16 x i8> %in, i64 11 - %87 = insertelement <128 x i8> %85, i8 %86, i32 43 - %88 = extractelement <16 x i8> %in, i64 12 - %89 = insertelement <128 x i8> %87, i8 %88, i32 44 - %90 = extractelement <16 x i8> %in, i64 13 - %91 = insertelement <128 x i8> %89, i8 %90, i32 45 - %92 = extractelement <16 x i8> %in, i64 14 - %93 = insertelement <128 x i8> %91, i8 %92, i32 46 - %94 = extractelement <16 x i8> %in, i64 15 - %95 = insertelement <128 x i8> %93, i8 %94, i32 47 - %96 = extractelement <16 x i8> %in, i64 0 - %97 = insertelement <128 x i8> %95, i8 %96, i32 48 - %98 = extractelement <16 x i8> %in, i64 1 - %99 = insertelement <128 x i8> %97, i8 %98, i32 49 - %100 = extractelement <16 x i8> %in, i64 2 - %101 = insertelement <128 x i8> %99, i8 %100, i32 50 - %102 = extractelement <16 x i8> %in, i64 3 - %103 = insertelement <128 x i8> %101, i8 %102, i32 51 - %104 = extractelement <16 x i8> %in, i64 4 - %105 = insertelement <128 x i8> %103, i8 %104, i32 52 - %106 = extractelement <16 x i8> %in, i64 5 - %107 = insertelement <128 x i8> %105, i8 %106, i32 53 - %108 = extractelement <16 x i8> %in, i64 6 - %109 = insertelement <128 x i8> %107, i8 %108, i32 54 - %110 = extractelement <16 x i8> %in, i64 7 - %111 = insertelement <128 x i8> %109, i8 %110, i32 55 - %112 = extractelement <16 x i8> %in, i64 8 - %113 = insertelement <128 x i8> %111, i8 %112, i32 56 - %114 = extractelement <16 x i8> %in, i64 9 - %115 = insertelement <128 x i8> %113, i8 %114, i32 57 - %116 = extractelement <16 x i8> %in, i64 10 - %117 = insertelement <128 x i8> %115, i8 %116, i32 58 - %118 = extractelement <16 x i8> %in, i64 11 - %119 = insertelement <128 x i8> %117, i8 %118, i32 59 - %120 = extractelement <16 x i8> %in, i64 12 - %121 = insertelement <128 x i8> %119, i8 %120, i32 60 - %122 = extractelement <16 x i8> %in, i64 13 - %123 = insertelement <128 x i8> %121, i8 %122, i32 61 - %124 = extractelement <16 x i8> %in, i64 14 - %125 = insertelement <128 x i8> %123, i8 %124, i32 62 - %126 = extractelement <16 x i8> %in, i64 15 - %127 = insertelement <128 x i8> %125, i8 %126, i32 63 - %128 = extractelement <16 x i8> %in, i64 0 - %129 = insertelement <128 x i8> %127, i8 %128, i32 64 - %130 = extractelement <16 x i8> %in, i64 1 - %131 = insertelement <128 x i8> %129, i8 %130, i32 65 - %132 = extractelement <16 x i8> %in, i64 2 - %133 = insertelement <128 x i8> %131, i8 %132, i32 66 - %134 = extractelement <16 x i8> %in, i64 3 - %135 = insertelement <128 x i8> %133, i8 %134, i32 67 - %136 = extractelement <16 x i8> %in, i64 4 - %137 = insertelement <128 x i8> %135, i8 %136, i32 68 - %138 = extractelement <16 x i8> %in, i64 5 - %139 = insertelement <128 x i8> %137, i8 %138, i32 69 - %140 = extractelement <16 x i8> %in, i64 6 - %141 = insertelement <128 x i8> %139, i8 %140, i32 70 - %142 = extractelement <16 x i8> %in, i64 7 - %143 = insertelement <128 x i8> %141, i8 %142, i32 71 - %144 = extractelement <16 x i8> %in, i64 8 - %145 = insertelement <128 x i8> %143, i8 %144, i32 72 - %146 = extractelement <16 x i8> %in, i64 9 - %147 = insertelement <128 x i8> %145, i8 %146, i32 73 - %148 = extractelement <16 x i8> %in, i64 10 - %149 = insertelement <128 x i8> %147, i8 %148, i32 74 - %150 = extractelement <16 x i8> %in, i64 11 - %151 = insertelement <128 x i8> %149, i8 %150, i32 75 - %152 = extractelement <16 x i8> %in, i64 12 - %153 = insertelement <128 x i8> %151, i8 %152, i32 76 - %154 = extractelement <16 x i8> %in, i64 13 - %155 = insertelement <128 x i8> %153, i8 %154, i32 77 - %156 = extractelement <16 x i8> %in, i64 14 - %157 = insertelement <128 x i8> %155, i8 %156, i32 78 - %158 = extractelement <16 x i8> %in, i64 15 - %159 = insertelement <128 x i8> %157, i8 %158, i32 79 - %160 = extractelement <16 x i8> %in, i64 0 - %161 = insertelement <128 x i8> %159, i8 %160, i32 80 - %162 = extractelement <16 x i8> %in, i64 1 - %163 = insertelement <128 x i8> %161, i8 %162, i32 81 - %164 = extractelement <16 x i8> %in, i64 2 - %165 = insertelement <128 x i8> %163, i8 %164, i32 82 - %166 = extractelement <16 x i8> %in, i64 3 - %167 = insertelement <128 x i8> %165, i8 %166, i32 83 - %168 = extractelement <16 x i8> %in, i64 4 - %169 = insertelement <128 x i8> %167, i8 %168, i32 84 - %170 = extractelement <16 x i8> %in, i64 5 - %171 = insertelement <128 x i8> %169, i8 %170, i32 85 - %172 = extractelement <16 x i8> %in, i64 6 - %173 = insertelement <128 x i8> %171, i8 %172, i32 86 - %174 = extractelement <16 x i8> %in, i64 7 - %175 = insertelement <128 x i8> %173, i8 %174, i32 87 - %176 = extractelement <16 x i8> %in, i64 8 - %177 = insertelement <128 x i8> %175, i8 %176, i32 88 - %178 = extractelement <16 x i8> %in, i64 9 - %179 = insertelement <128 x i8> %177, i8 %178, i32 89 - %180 = extractelement <16 x i8> %in, i64 10 - %181 = insertelement <128 x i8> %179, i8 %180, i32 90 - %182 = extractelement <16 x i8> %in, i64 11 - %183 = insertelement <128 x i8> %181, i8 %182, i32 91 - %184 = extractelement <16 x i8> %in, i64 12 - %185 = insertelement <128 x i8> %183, i8 %184, i32 92 - %186 = extractelement <16 x i8> %in, i64 13 - %187 = insertelement <128 x i8> %185, i8 %186, i32 93 - %188 = extractelement <16 x i8> %in, i64 14 - %189 = insertelement <128 x i8> %187, i8 %188, i32 94 - %190 = extractelement <16 x i8> %in, i64 15 - %191 = insertelement <128 x i8> %189, i8 %190, i32 95 - %192 = extractelement <16 x i8> %in, i64 0 - %193 = insertelement <128 x i8> %191, i8 %192, i32 96 - %194 = extractelement <16 x i8> %in, i64 1 - %195 = insertelement <128 x i8> %193, i8 %194, i32 97 - %196 = extractelement <16 x i8> %in, i64 2 - %197 = insertelement <128 x i8> %195, i8 %196, i32 98 - %198 = extractelement <16 x i8> %in, i64 3 - %199 = insertelement <128 x i8> %197, i8 %198, i32 99 - %200 = extractelement <16 x i8> %in, i64 4 - %201 = insertelement <128 x i8> %199, i8 %200, i32 100 - %202 = extractelement <16 x i8> %in, i64 5 - %203 = insertelement <128 x i8> %201, i8 %202, i32 101 - %204 = extractelement <16 x i8> %in, i64 6 - %205 = insertelement <128 x i8> %203, i8 %204, i32 102 - %206 = extractelement <16 x i8> %in, i64 7 - %207 = insertelement <128 x i8> %205, i8 %206, i32 103 - %208 = extractelement <16 x i8> %in, i64 8 - %209 = insertelement <128 x i8> %207, i8 %208, i32 104 - %210 = extractelement <16 x i8> %in, i64 9 - %211 = insertelement <128 x i8> %209, i8 %210, i32 105 - %212 = extractelement <16 x i8> %in, i64 10 - %213 = insertelement <128 x i8> %211, i8 %212, i32 106 - %214 = extractelement <16 x i8> %in, i64 11 - %215 = insertelement <128 x i8> %213, i8 %214, i32 107 - %216 = extractelement <16 x i8> %in, i64 12 - %217 = insertelement <128 x i8> %215, i8 %216, i32 108 - %218 = extractelement <16 x i8> %in, i64 13 - %219 = insertelement <128 x i8> %217, i8 %218, i32 109 - %220 = extractelement <16 x i8> %in, i64 14 - %221 = insertelement <128 x i8> %219, i8 %220, i32 110 - %222 = extractelement <16 x i8> %in, i64 15 - %223 = insertelement <128 x i8> %221, i8 %222, i32 111 - %224 = extractelement <16 x i8> %in, i64 0 - %225 = insertelement <128 x i8> %223, i8 %224, i32 112 - %226 = extractelement <16 x i8> %in, i64 1 - %227 = insertelement <128 x i8> %225, i8 %226, i32 113 - %228 = extractelement <16 x i8> %in, i64 2 - %229 = insertelement <128 x i8> %227, i8 %228, i32 114 - %230 = extractelement <16 x i8> %in, i64 3 - %231 = insertelement <128 x i8> %229, i8 %230, i32 115 - %232 = extractelement <16 x i8> %in, i64 4 - %233 = insertelement <128 x i8> %231, i8 %232, i32 116 - %234 = extractelement <16 x i8> %in, i64 5 - %235 = insertelement <128 x i8> %233, i8 %234, i32 117 - %236 = extractelement <16 x i8> %in, i64 6 - %237 = insertelement <128 x i8> %235, i8 %236, i32 118 - %238 = extractelement <16 x i8> %in, i64 7 - %239 = insertelement <128 x i8> %237, i8 %238, i32 119 - %240 = extractelement <16 x i8> %in, i64 8 - %241 = insertelement <128 x i8> %239, i8 %240, i32 120 - %242 = extractelement <16 x i8> %in, i64 9 - %243 = insertelement <128 x i8> %241, i8 %242, i32 121 - %244 = extractelement <16 x i8> %in, i64 10 - %245 = insertelement <128 x i8> %243, i8 %244, i32 122 - %246 = extractelement <16 x i8> %in, i64 11 - %247 = insertelement <128 x i8> %245, i8 %246, i32 123 - %248 = extractelement <16 x i8> %in, i64 12 - %249 = insertelement <128 x i8> %247, i8 %248, i32 124 - %250 = extractelement <16 x i8> %in, i64 13 - %251 = insertelement <128 x i8> %249, i8 %250, i32 125 - %252 = extractelement <16 x i8> %in, i64 14 - %253 = insertelement <128 x i8> %251, i8 %252, i32 126 - %254 = extractelement <16 x i8> %in, i64 15 - %255 = insertelement <128 x i8> %253, i8 %254, i32 127 - %256 = insertelement <16 x i8> poison, i8 %160, i64 0 - %257 = insertelement <16 x i8> %256, i8 %162, i64 1 - %258 = insertelement <16 x i8> %257, i8 %164, i64 2 - %259 = insertelement <16 x i8> %258, i8 %166, i64 3 - %260 = insertelement <16 x i8> %259, i8 %168, i64 4 - %261 = insertelement <16 x i8> %260, i8 %170, i64 5 - %262 = insertelement <16 x i8> %261, i8 %172, i64 6 - %263 = insertelement <16 x i8> %262, i8 %174, i64 7 - %264 = insertelement <16 x i8> %263, i8 %176, i64 8 - %265 = insertelement <16 x i8> %264, i8 %178, i64 9 - %266 = insertelement <16 x i8> %265, i8 %180, i64 10 - %267 = insertelement <16 x i8> %266, i8 %182, i64 11 - %268 = insertelement <16 x i8> %267, i8 %184, i64 12 - %269 = insertelement <16 x i8> %268, i8 %186, i64 13 - %270 = insertelement <16 x i8> %269, i8 %188, i64 14 - %271 = insertelement <16 x i8> %270, i8 %190, i64 15 - %sum = add <16 x i8> %271, %add + %i160 = extractelement <16 x i8> %in, i64 0 + %i162 = extractelement <16 x i8> %in, i64 1 + %i164 = extractelement <16 x i8> %in, i64 2 + %i166 = extractelement <16 x i8> %in, i64 3 + %i168 = extractelement <16 x i8> %in, i64 4 + %i170 = extractelement <16 x i8> %in, i64 5 + %i172 = extractelement <16 x i8> %in, i64 6 + %i174 = extractelement <16 x i8> %in, i64 7 + %i176 = extractelement <16 x i8> %in, i64 8 + %i178 = extractelement <16 x i8> %in, i64 9 + %i180 = extractelement <16 x i8> %in, i64 10 + %i182 = extractelement <16 x i8> %in, i64 11 + %i184 = extractelement <16 x i8> %in, i64 12 + %i186 = extractelement <16 x i8> %in, i64 13 + %i188 = extractelement <16 x i8> %in, i64 14 + %i190 = extractelement <16 x i8> %in, i64 15 + %i256 = insertelement <16 x i8> poison, i8 %i160, i64 0 + %i257 = insertelement <16 x i8> %i256, i8 %i162, i64 1 + %i258 = insertelement <16 x i8> %i257, i8 %i164, i64 2 + %i259 = insertelement <16 x i8> %i258, i8 %i166, i64 3 + %i260 = insertelement <16 x i8> %i259, i8 %i168, i64 4 + %i261 = insertelement <16 x i8> %i260, i8 %i170, i64 5 + %i262 = insertelement <16 x i8> %i261, i8 %i172, i64 6 + %i263 = insertelement <16 x i8> %i262, i8 %i174, i64 7 + %i264 = insertelement <16 x i8> %i263, i8 %i176, i64 8 + %i265 = insertelement <16 x i8> %i264, i8 %i178, i64 9 + %i266 = insertelement <16 x i8> %i265, i8 %i180, i64 10 + %i267 = insertelement <16 x i8> %i266, i8 %i182, i64 11 + %i268 = insertelement <16 x i8> %i267, i8 %i184, i64 12 + %i269 = insertelement <16 x i8> %i268, i8 %i186, i64 13 + %i270 = insertelement <16 x i8> %i269, i8 %i188, i64 14 + %i271 = insertelement <16 x i8> %i270, i8 %i190, i64 15 + %sum = add <16 x i8> %i271, %add store <16 x i8> %sum, ptr addrspace(3) %out, align 16 ret void } diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll new file mode 100644 index 0000000000000..f847a90e91e9e --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll @@ -0,0 +1,186 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx1250 -passes=vector-combine < %s | FileCheck -check-prefix=OPT %s + +define <32 x i8> @extract_insert_chain(<8 x i8> %in0, <8 x i8> %in1, <8 x i8> %in2, <8 x i8> %in3) { +; OPT-LABEL: define <32 x i8> @extract_insert_chain( +; OPT-SAME: <8 x i8> [[IN0:%.*]], <8 x i8> [[IN1:%.*]], <8 x i8> [[IN2:%.*]], <8 x i8> [[IN3:%.*]]) #[[ATTR0:[0-9]+]] { +; OPT-NEXT: [[ENTRY:.*:]] +; OPT-NEXT: [[I_0_0:%.*]] = extractelement <8 x i8> [[IN0]], i64 0 +; OPT-NEXT: [[I_0_1:%.*]] = extractelement <8 x i8> [[IN0]], i64 1 +; OPT-NEXT: [[I_0_2:%.*]] = extractelement <8 x i8> [[IN0]], i64 2 +; OPT-NEXT: [[I_0_3:%.*]] = extractelement <8 x i8> [[IN0]], i64 3 +; OPT-NEXT: [[I_0_4:%.*]] = extractelement <8 x i8> [[IN0]], i64 4 +; OPT-NEXT: [[I_0_5:%.*]] = extractelement <8 x i8> [[IN0]], i64 5 +; OPT-NEXT: [[I_0_6:%.*]] = extractelement <8 x i8> [[IN0]], i64 6 +; OPT-NEXT: [[I_0_7:%.*]] = extractelement <8 x i8> [[IN0]], i64 7 +; OPT-NEXT: [[I_1_1:%.*]] = extractelement <8 x i8> [[IN1]], i64 1 +; OPT-NEXT: [[I_1_2:%.*]] = extractelement <8 x i8> [[IN1]], i64 2 +; OPT-NEXT: [[I_1_3:%.*]] = extractelement <8 x i8> [[IN1]], i64 3 +; OPT-NEXT: [[I_1_4:%.*]] = extractelement <8 x i8> [[IN1]], i64 4 +; OPT-NEXT: [[I_1_5:%.*]] = extractelement <8 x i8> [[IN1]], i64 5 +; OPT-NEXT: [[I_1_6:%.*]] = extractelement <8 x i8> [[IN1]], i64 6 +; OPT-NEXT: [[I_1_7:%.*]] = extractelement <8 x i8> [[IN1]], i64 7 +; OPT-NEXT: [[I_2_1:%.*]] = extractelement <8 x i8> [[IN2]], i64 1 +; OPT-NEXT: [[I_2_2:%.*]] = extractelement <8 x i8> [[IN2]], i64 2 +; OPT-NEXT: [[I_2_3:%.*]] = extractelement <8 x i8> [[IN2]], i64 3 +; OPT-NEXT: [[I_2_4:%.*]] = extractelement <8 x i8> [[IN2]], i64 4 +; OPT-NEXT: [[I_2_5:%.*]] = extractelement <8 x i8> [[IN2]], i64 5 +; OPT-NEXT: [[I_2_6:%.*]] = extractelement <8 x i8> [[IN2]], i64 6 +; OPT-NEXT: [[I_2_7:%.*]] = extractelement <8 x i8> [[IN2]], i64 7 +; OPT-NEXT: [[I_3_1:%.*]] = extractelement <8 x i8> [[IN3]], i64 1 +; OPT-NEXT: [[I_3_2:%.*]] = extractelement <8 x i8> [[IN3]], i64 2 +; OPT-NEXT: [[I_3_3:%.*]] = extractelement <8 x i8> [[IN3]], i64 3 +; OPT-NEXT: [[I_3_4:%.*]] = extractelement <8 x i8> [[IN3]], i64 4 +; OPT-NEXT: [[I_3_5:%.*]] = extractelement <8 x i8> [[IN3]], i64 5 +; OPT-NEXT: [[I_3_6:%.*]] = extractelement <8 x i8> [[IN3]], i64 6 +; OPT-NEXT: [[I_3_7:%.*]] = extractelement <8 x i8> [[IN3]], i64 7 +; OPT-NEXT: [[O_0_0:%.*]] = insertelement <32 x i8> poison, i8 [[I_0_0]], i32 0 +; OPT-NEXT: [[O_0_1:%.*]] = insertelement <32 x i8> [[O_0_0]], i8 [[I_0_1]], i32 1 +; OPT-NEXT: [[O_0_2:%.*]] = insertelement <32 x i8> [[O_0_1]], i8 [[I_0_2]], i32 2 +; OPT-NEXT: [[O_0_3:%.*]] = insertelement <32 x i8> [[O_0_2]], i8 [[I_0_3]], i32 3 +; OPT-NEXT: [[O_0_4:%.*]] = insertelement <32 x i8> [[O_0_3]], i8 [[I_0_4]], i32 4 +; OPT-NEXT: [[O_0_5:%.*]] = insertelement <32 x i8> [[O_0_4]], i8 [[I_0_5]], i32 5 +; OPT-NEXT: [[O_0_6:%.*]] = insertelement <32 x i8> [[O_0_5]], i8 [[I_0_6]], i32 6 +; OPT-NEXT: [[O_0_7:%.*]] = insertelement <32 x i8> [[O_0_6]], i8 [[I_0_7]], i32 7 +; OPT-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[IN1]], <8 x i8> poison, <32 x i32> +; OPT-NEXT: [[O_1_0:%.*]] = shufflevector <32 x i8> [[O_0_7]], <32 x i8> [[TMP0]], <32 x i32> +; OPT-NEXT: [[O_1_1:%.*]] = insertelement <32 x i8> [[O_1_0]], i8 [[I_1_1]], i32 9 +; OPT-NEXT: [[O_1_2:%.*]] = insertelement <32 x i8> [[O_1_1]], i8 [[I_1_2]], i32 10 +; OPT-NEXT: [[O_1_3:%.*]] = insertelement <32 x i8> [[O_1_2]], i8 [[I_1_3]], i32 11 +; OPT-NEXT: [[O_1_4:%.*]] = insertelement <32 x i8> [[O_1_3]], i8 [[I_1_4]], i32 12 +; OPT-NEXT: [[O_1_5:%.*]] = insertelement <32 x i8> [[O_1_4]], i8 [[I_1_5]], i32 13 +; OPT-NEXT: [[O_1_6:%.*]] = insertelement <32 x i8> [[O_1_5]], i8 [[I_1_6]], i32 14 +; OPT-NEXT: [[O_1_7:%.*]] = insertelement <32 x i8> [[O_1_6]], i8 [[I_1_7]], i32 15 +; OPT-NEXT: [[TMP1:%.*]] = shufflevector <8 x i8> [[IN2]], <8 x i8> poison, <32 x i32> +; OPT-NEXT: [[O_2_0:%.*]] = shufflevector <32 x i8> [[O_1_7]], <32 x i8> [[TMP1]], <32 x i32> +; OPT-NEXT: [[O_2_1:%.*]] = insertelement <32 x i8> [[O_2_0]], i8 [[I_2_1]], i32 17 +; OPT-NEXT: [[O_2_2:%.*]] = insertelement <32 x i8> [[O_2_1]], i8 [[I_2_2]], i32 18 +; OPT-NEXT: [[O_2_3:%.*]] = insertelement <32 x i8> [[O_2_2]], i8 [[I_2_3]], i32 19 +; OPT-NEXT: [[O_2_4:%.*]] = insertelement <32 x i8> [[O_2_3]], i8 [[I_2_4]], i32 20 +; OPT-NEXT: [[O_2_5:%.*]] = insertelement <32 x i8> [[O_2_4]], i8 [[I_2_5]], i32 21 +; OPT-NEXT: [[O_2_6:%.*]] = insertelement <32 x i8> [[O_2_5]], i8 [[I_2_6]], i32 22 +; OPT-NEXT: [[O_2_7:%.*]] = insertelement <32 x i8> [[O_2_6]], i8 [[I_2_7]], i32 23 +; OPT-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[IN3]], <8 x i8> poison, <32 x i32> +; OPT-NEXT: [[O_3_0:%.*]] = shufflevector <32 x i8> [[O_2_7]], <32 x i8> [[TMP2]], <32 x i32> +; OPT-NEXT: [[O_3_1:%.*]] = insertelement <32 x i8> [[O_3_0]], i8 [[I_3_1]], i32 25 +; OPT-NEXT: [[O_3_2:%.*]] = insertelement <32 x i8> [[O_3_1]], i8 [[I_3_2]], i32 26 +; OPT-NEXT: [[O_3_3:%.*]] = insertelement <32 x i8> [[O_3_2]], i8 [[I_3_3]], i32 27 +; OPT-NEXT: [[O_3_4:%.*]] = insertelement <32 x i8> [[O_3_3]], i8 [[I_3_4]], i32 28 +; OPT-NEXT: [[O_3_5:%.*]] = insertelement <32 x i8> [[O_3_4]], i8 [[I_3_5]], i32 29 +; OPT-NEXT: [[O_3_6:%.*]] = insertelement <32 x i8> [[O_3_5]], i8 [[I_3_6]], i32 30 +; OPT-NEXT: [[O_3_7:%.*]] = insertelement <32 x i8> [[O_3_6]], i8 [[I_3_7]], i32 31 +; OPT-NEXT: ret <32 x i8> [[O_3_7]] +; +entry: + %i.0.0 = extractelement <8 x i8> %in0, i64 0 + %i.0.1 = extractelement <8 x i8> %in0, i64 1 + %i.0.2 = extractelement <8 x i8> %in0, i64 2 + %i.0.3 = extractelement <8 x i8> %in0, i64 3 + %i.0.4 = extractelement <8 x i8> %in0, i64 4 + %i.0.5 = extractelement <8 x i8> %in0, i64 5 + %i.0.6 = extractelement <8 x i8> %in0, i64 6 + %i.0.7 = extractelement <8 x i8> %in0, i64 7 + %i.1.0 = extractelement <8 x i8> %in1, i64 0 + %i.1.1 = extractelement <8 x i8> %in1, i64 1 + %i.1.2 = extractelement <8 x i8> %in1, i64 2 + %i.1.3 = extractelement <8 x i8> %in1, i64 3 + %i.1.4 = extractelement <8 x i8> %in1, i64 4 + %i.1.5 = extractelement <8 x i8> %in1, i64 5 + %i.1.6 = extractelement <8 x i8> %in1, i64 6 + %i.1.7 = extractelement <8 x i8> %in1, i64 7 + %i.2.0 = extractelement <8 x i8> %in2, i64 0 + %i.2.1 = extractelement <8 x i8> %in2, i64 1 + %i.2.2 = extractelement <8 x i8> %in2, i64 2 + %i.2.3 = extractelement <8 x i8> %in2, i64 3 + %i.2.4 = extractelement <8 x i8> %in2, i64 4 + %i.2.5 = extractelement <8 x i8> %in2, i64 5 + %i.2.6 = extractelement <8 x i8> %in2, i64 6 + %i.2.7 = extractelement <8 x i8> %in2, i64 7 + %i.3.0 = extractelement <8 x i8> %in3, i64 0 + %i.3.1 = extractelement <8 x i8> %in3, i64 1 + %i.3.2 = extractelement <8 x i8> %in3, i64 2 + %i.3.3 = extractelement <8 x i8> %in3, i64 3 + %i.3.4 = extractelement <8 x i8> %in3, i64 4 + %i.3.5 = extractelement <8 x i8> %in3, i64 5 + %i.3.6 = extractelement <8 x i8> %in3, i64 6 + %i.3.7 = extractelement <8 x i8> %in3, i64 7 + + %o.0.0 = insertelement <32 x i8> poison, i8 %i.0.0, i32 0 + %o.0.1 = insertelement <32 x i8> %o.0.0, i8 %i.0.1, i32 1 + %o.0.2 = insertelement <32 x i8> %o.0.1, i8 %i.0.2, i32 2 + %o.0.3 = insertelement <32 x i8> %o.0.2, i8 %i.0.3, i32 3 + %o.0.4 = insertelement <32 x i8> %o.0.3, i8 %i.0.4, i32 4 + %o.0.5 = insertelement <32 x i8> %o.0.4, i8 %i.0.5, i32 5 + %o.0.6 = insertelement <32 x i8> %o.0.5, i8 %i.0.6, i32 6 + %o.0.7 = insertelement <32 x i8> %o.0.6, i8 %i.0.7, i32 7 + + %o.1.0 = insertelement <32 x i8> %o.0.7, i8 %i.1.0, i32 8 + %o.1.1 = insertelement <32 x i8> %o.1.0, i8 %i.1.1, i32 9 + %o.1.2 = insertelement <32 x i8> %o.1.1, i8 %i.1.2, i32 10 + %o.1.3 = insertelement <32 x i8> %o.1.2, i8 %i.1.3, i32 11 + %o.1.4 = insertelement <32 x i8> %o.1.3, i8 %i.1.4, i32 12 + %o.1.5 = insertelement <32 x i8> %o.1.4, i8 %i.1.5, i32 13 + %o.1.6 = insertelement <32 x i8> %o.1.5, i8 %i.1.6, i32 14 + %o.1.7 = insertelement <32 x i8> %o.1.6, i8 %i.1.7, i32 15 + + %o.2.0 = insertelement <32 x i8> %o.1.7, i8 %i.2.0, i32 16 + %o.2.1 = insertelement <32 x i8> %o.2.0, i8 %i.2.1, i32 17 + %o.2.2 = insertelement <32 x i8> %o.2.1, i8 %i.2.2, i32 18 + %o.2.3 = insertelement <32 x i8> %o.2.2, i8 %i.2.3, i32 19 + %o.2.4 = insertelement <32 x i8> %o.2.3, i8 %i.2.4, i32 20 + %o.2.5 = insertelement <32 x i8> %o.2.4, i8 %i.2.5, i32 21 + %o.2.6 = insertelement <32 x i8> %o.2.5, i8 %i.2.6, i32 22 + %o.2.7 = insertelement <32 x i8> %o.2.6, i8 %i.2.7, i32 23 + + %o.3.0 = insertelement <32 x i8> %o.2.7, i8 %i.3.0, i32 24 + %o.3.1 = insertelement <32 x i8> %o.3.0, i8 %i.3.1, i32 25 + %o.3.2 = insertelement <32 x i8> %o.3.1, i8 %i.3.2, i32 26 + %o.3.3 = insertelement <32 x i8> %o.3.2, i8 %i.3.3, i32 27 + %o.3.4 = insertelement <32 x i8> %o.3.3, i8 %i.3.4, i32 28 + %o.3.5 = insertelement <32 x i8> %o.3.4, i8 %i.3.5, i32 29 + %o.3.6 = insertelement <32 x i8> %o.3.5, i8 %i.3.6, i32 30 + %o.3.7 = insertelement <32 x i8> %o.3.6, i8 %i.3.7, i32 31 + + ret <32 x i8> %o.3.7 +} + +define <8 x i8> @extract_insert_chain_shortening(<32 x i8> %in) { +; OPT-LABEL: define <8 x i8> @extract_insert_chain_shortening( +; OPT-SAME: <32 x i8> [[IN:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[I_1:%.*]] = extractelement <32 x i8> [[IN]], i64 17 +; OPT-NEXT: [[I_3:%.*]] = extractelement <32 x i8> [[IN]], i64 19 +; OPT-NEXT: [[I_5:%.*]] = extractelement <32 x i8> [[IN]], i64 21 +; OPT-NEXT: [[I_7:%.*]] = extractelement <32 x i8> [[IN]], i64 23 +; OPT-NEXT: [[O_0:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> poison, <8 x i32> +; OPT-NEXT: [[O_1:%.*]] = insertelement <8 x i8> [[O_0]], i8 [[I_1]], i32 1 +; OPT-NEXT: [[TMP2:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> poison, <8 x i32> +; OPT-NEXT: [[O_2:%.*]] = shufflevector <8 x i8> [[O_1]], <8 x i8> [[TMP2]], <8 x i32> +; OPT-NEXT: [[O_3:%.*]] = insertelement <8 x i8> [[O_2]], i8 [[I_3]], i32 3 +; OPT-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> poison, <8 x i32> +; OPT-NEXT: [[O_4:%.*]] = shufflevector <8 x i8> [[O_3]], <8 x i8> [[TMP1]], <8 x i32> +; OPT-NEXT: [[O_5:%.*]] = insertelement <8 x i8> [[O_4]], i8 [[I_5]], i32 5 +; OPT-NEXT: [[TMP3:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> poison, <8 x i32> +; OPT-NEXT: [[O_6:%.*]] = shufflevector <8 x i8> [[O_5]], <8 x i8> [[TMP3]], <8 x i32> +; OPT-NEXT: [[O_7:%.*]] = insertelement <8 x i8> [[O_6]], i8 [[I_7]], i32 7 +; OPT-NEXT: ret <8 x i8> [[O_7]] +; + %i.0 = extractelement <32 x i8> %in, i64 16 + %i.1 = extractelement <32 x i8> %in, i64 17 + %i.2 = extractelement <32 x i8> %in, i64 18 + %i.3 = extractelement <32 x i8> %in, i64 19 + %i.4 = extractelement <32 x i8> %in, i64 20 + %i.5 = extractelement <32 x i8> %in, i64 21 + %i.6 = extractelement <32 x i8> %in, i64 22 + %i.7 = extractelement <32 x i8> %in, i64 23 + + %o.0 = insertelement <8 x i8> poison, i8 %i.0, i32 0 + %o.1 = insertelement <8 x i8> %o.0, i8 %i.1, i32 1 + %o.2 = insertelement <8 x i8> %o.1, i8 %i.2, i32 2 + %o.3 = insertelement <8 x i8> %o.2, i8 %i.3, i32 3 + %o.4 = insertelement <8 x i8> %o.3, i8 %i.4, i32 4 + %o.5 = insertelement <8 x i8> %o.4, i8 %i.5, i32 5 + %o.6 = insertelement <8 x i8> %o.5, i8 %i.6, i32 6 + %o.7 = insertelement <8 x i8> %o.6, i8 %i.7, i32 7 + + ret <8 x i8> %o.7 +}