@@ -1029,144 +1029,46 @@ define i1 @complement_cmpz_i128(ptr %word, i32 %position) nounwind {
10291029define i32 @reset_multiload_i128 (ptr %word , i32 %position , ptr %p ) nounwind {
10301030; X86-LABEL: reset_multiload_i128:
10311031; X86: # %bb.0:
1032- ; X86-NEXT: pushl %ebp
1033- ; X86-NEXT: movl %esp, %ebp
10341032; X86-NEXT: pushl %ebx
10351033; X86-NEXT: pushl %edi
10361034; X86-NEXT: pushl %esi
1037- ; X86-NEXT: andl $-16, %esp
1038- ; X86-NEXT: subl $64, %esp
1039- ; X86-NEXT: movl 12(%ebp), %ecx
1040- ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1041- ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1042- ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1043- ; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
1044- ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1045- ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1046- ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1047- ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1048- ; X86-NEXT: movl %ecx, %eax
1049- ; X86-NEXT: shrb $3, %al
1050- ; X86-NEXT: andb $12, %al
1051- ; X86-NEXT: negb %al
1052- ; X86-NEXT: movsbl %al, %eax
1053- ; X86-NEXT: movl 40(%esp,%eax), %edx
1054- ; X86-NEXT: movl 44(%esp,%eax), %esi
1055- ; X86-NEXT: shldl %cl, %edx, %esi
1056- ; X86-NEXT: movl 32(%esp,%eax), %edi
1057- ; X86-NEXT: movl 36(%esp,%eax), %ebx
1058- ; X86-NEXT: shldl %cl, %ebx, %edx
1059- ; X86-NEXT: shldl %cl, %edi, %ebx
1060- ; X86-NEXT: notl %ebx
1061- ; X86-NEXT: movl 16(%ebp), %eax
1035+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1036+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1037+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
10621038; X86-NEXT: movl (%eax), %eax
1063- ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1064- ; X86-NEXT: movl 8(%ebp), %eax
1065- ; X86-NEXT: andl %ebx, 4(%eax)
1066- ; X86-NEXT: shll %cl, %edi
1067- ; X86-NEXT: notl %edi
1068- ; X86-NEXT: movl %ecx, %ebx
1069- ; X86-NEXT: andl $96, %ebx
1070- ; X86-NEXT: shrl $3, %ebx
1071- ; X86-NEXT: movl (%eax,%ebx), %ebx
1072- ; X86-NEXT: andl %edi, (%eax)
1073- ; X86-NEXT: notl %esi
1074- ; X86-NEXT: andl %esi, 12(%eax)
1075- ; X86-NEXT: notl %edx
1076- ; X86-NEXT: andl %edx, 8(%eax)
1077- ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
1078- ; X86-NEXT: btl %ecx, %ebx
1039+ ; X86-NEXT: movl %edx, %esi
1040+ ; X86-NEXT: andl $96, %esi
1041+ ; X86-NEXT: shrl $3, %esi
1042+ ; X86-NEXT: movl (%ecx,%esi), %edi
1043+ ; X86-NEXT: movl %edi, %ebx
1044+ ; X86-NEXT: btrl %edx, %ebx
1045+ ; X86-NEXT: btl %edx, %edi
1046+ ; X86-NEXT: movl %ebx, (%ecx,%esi)
10791047; X86-NEXT: jae .LBB22_2
10801048; X86-NEXT: # %bb.1:
10811049; X86-NEXT: xorl %eax, %eax
10821050; X86-NEXT: .LBB22_2:
1083- ; X86-NEXT: leal -12(%ebp), %esp
10841051; X86-NEXT: popl %esi
10851052; X86-NEXT: popl %edi
10861053; X86-NEXT: popl %ebx
1087- ; X86-NEXT: popl %ebp
10881054; X86-NEXT: retl
10891055;
1090- ; SSE-LABEL: reset_multiload_i128:
1091- ; SSE: # %bb.0:
1092- ; SSE-NEXT: movl %esi, %ecx
1093- ; SSE-NEXT: movl $1, %esi
1094- ; SSE-NEXT: xorl %r8d, %r8d
1095- ; SSE-NEXT: shldq %cl, %rsi, %r8
1096- ; SSE-NEXT: xorl %eax, %eax
1097- ; SSE-NEXT: shlq %cl, %rsi
1098- ; SSE-NEXT: testb $64, %cl
1099- ; SSE-NEXT: cmovneq %rsi, %r8
1100- ; SSE-NEXT: cmovneq %rax, %rsi
1101- ; SSE-NEXT: notq %r8
1102- ; SSE-NEXT: notq %rsi
1103- ; SSE-NEXT: movl %ecx, %r9d
1104- ; SSE-NEXT: andl $96, %r9d
1105- ; SSE-NEXT: shrl $3, %r9d
1106- ; SSE-NEXT: movl (%rdi,%r9), %r9d
1107- ; SSE-NEXT: btl %ecx, %r9d
1108- ; SSE-NEXT: jb .LBB22_2
1109- ; SSE-NEXT: # %bb.1:
1110- ; SSE-NEXT: movl (%rdx), %eax
1111- ; SSE-NEXT: .LBB22_2:
1112- ; SSE-NEXT: andq %rsi, (%rdi)
1113- ; SSE-NEXT: andq %r8, 8(%rdi)
1114- ; SSE-NEXT: # kill: def $eax killed $eax killed $rax
1115- ; SSE-NEXT: retq
1116- ;
1117- ; AVX2-LABEL: reset_multiload_i128:
1118- ; AVX2: # %bb.0:
1119- ; AVX2-NEXT: movl %esi, %ecx
1120- ; AVX2-NEXT: xorl %eax, %eax
1121- ; AVX2-NEXT: movl $1, %r8d
1122- ; AVX2-NEXT: xorl %esi, %esi
1123- ; AVX2-NEXT: shldq %cl, %r8, %rsi
1124- ; AVX2-NEXT: shlxq %rcx, %r8, %r8
1125- ; AVX2-NEXT: testb $64, %cl
1126- ; AVX2-NEXT: cmovneq %r8, %rsi
1127- ; AVX2-NEXT: cmovneq %rax, %r8
1128- ; AVX2-NEXT: notq %rsi
1129- ; AVX2-NEXT: notq %r8
1130- ; AVX2-NEXT: movl %ecx, %r9d
1131- ; AVX2-NEXT: andl $96, %r9d
1132- ; AVX2-NEXT: shrl $3, %r9d
1133- ; AVX2-NEXT: movl (%rdi,%r9), %r9d
1134- ; AVX2-NEXT: btl %ecx, %r9d
1135- ; AVX2-NEXT: jb .LBB22_2
1136- ; AVX2-NEXT: # %bb.1:
1137- ; AVX2-NEXT: movl (%rdx), %eax
1138- ; AVX2-NEXT: .LBB22_2:
1139- ; AVX2-NEXT: andq %r8, (%rdi)
1140- ; AVX2-NEXT: andq %rsi, 8(%rdi)
1141- ; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
1142- ; AVX2-NEXT: retq
1143- ;
1144- ; AVX512-LABEL: reset_multiload_i128:
1145- ; AVX512: # %bb.0:
1146- ; AVX512-NEXT: movl %esi, %ecx
1147- ; AVX512-NEXT: movl $1, %r8d
1148- ; AVX512-NEXT: xorl %esi, %esi
1149- ; AVX512-NEXT: shldq %cl, %r8, %rsi
1150- ; AVX512-NEXT: xorl %eax, %eax
1151- ; AVX512-NEXT: shlxq %rcx, %r8, %r8
1152- ; AVX512-NEXT: testb $64, %cl
1153- ; AVX512-NEXT: cmovneq %r8, %rsi
1154- ; AVX512-NEXT: cmovneq %rax, %r8
1155- ; AVX512-NEXT: notq %rsi
1156- ; AVX512-NEXT: notq %r8
1157- ; AVX512-NEXT: movl %ecx, %r9d
1158- ; AVX512-NEXT: andl $96, %r9d
1159- ; AVX512-NEXT: shrl $3, %r9d
1160- ; AVX512-NEXT: movl (%rdi,%r9), %r9d
1161- ; AVX512-NEXT: btl %ecx, %r9d
1162- ; AVX512-NEXT: jb .LBB22_2
1163- ; AVX512-NEXT: # %bb.1:
1164- ; AVX512-NEXT: movl (%rdx), %eax
1165- ; AVX512-NEXT: .LBB22_2:
1166- ; AVX512-NEXT: andq %r8, (%rdi)
1167- ; AVX512-NEXT: andq %rsi, 8(%rdi)
1168- ; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
1169- ; AVX512-NEXT: retq
1056+ ; X64-LABEL: reset_multiload_i128:
1057+ ; X64: # %bb.0:
1058+ ; X64-NEXT: movl %esi, %ecx
1059+ ; X64-NEXT: andl $96, %ecx
1060+ ; X64-NEXT: shrl $3, %ecx
1061+ ; X64-NEXT: movl (%rdi,%rcx), %r9d
1062+ ; X64-NEXT: movl %r9d, %r8d
1063+ ; X64-NEXT: btrl %esi, %r8d
1064+ ; X64-NEXT: xorl %eax, %eax
1065+ ; X64-NEXT: btl %esi, %r9d
1066+ ; X64-NEXT: jb .LBB22_2
1067+ ; X64-NEXT: # %bb.1:
1068+ ; X64-NEXT: movl (%rdx), %eax
1069+ ; X64-NEXT: .LBB22_2:
1070+ ; X64-NEXT: movl %r8d, (%rdi,%rcx)
1071+ ; X64-NEXT: retq
11701072 %rem = and i32 %position , 127
11711073 %ofs = zext nneg i32 %rem to i128
11721074 %bit = shl nuw i128 1 , %ofs
0 commit comments