@@ -221,6 +221,61 @@ func.func @scaled_ext_scalar_f4e2m1_bf16(%v: vector<2xf4E2M1FN>, %scale: f32) ->
221
221
func.return %ret : vector <2 xbf16 >
222
222
}
223
223
224
+ // CHECK-LABEL: func.func @scaled_ext_packed8_fp4
225
+ func.func @scaled_ext_packed8_fp4 (%v: vector <8 xf4 E2 M1 FN>, %scale: f32 ) -> (vector <8 xf16 >, vector <8 xbf16 >, vector <8 xf32 >) {
226
+ // CHECK: amdgpu.scaled_ext_packed8
227
+ %ret0 = amdgpu.scaled_ext_packed8 %v , %scale [0 ] : vector <8 xf4 E2 M1 FN> to vector <8 xf16 >
228
+ // CHECK: amdgpu.scaled_ext_packed8
229
+ %ret1 = amdgpu.scaled_ext_packed8 %v , %scale [0 ] : vector <8 xf4 E2 M1 FN> to vector <8 xbf16 >
230
+ // CHECK: amdgpu.scaled_ext_packed8
231
+ %ret2 = amdgpu.scaled_ext_packed8 %v , %scale [0 ] : vector <8 xf4 E2 M1 FN> to vector <8 xf32 >
232
+ func.return %ret0 , %ret1 , %ret2 : vector <8 xf16 >, vector <8 xbf16 >, vector <8 xf32 >
233
+ }
234
+
235
+ // CHECK-LABEL: func.func @scaled_ext_packed8_fp8
236
+ func.func @scaled_ext_packed8_fp8 (%v: vector <8 xf8 E4 M3 FN>, %scale: f32 ) -> (vector <8 xf16 >, vector <8 xbf16 >, vector <8 xf32 >) {
237
+ // CHECK: amdgpu.scaled_ext_packed8
238
+ %ret0 = amdgpu.scaled_ext_packed8 %v , %scale [0 ] : vector <8 xf8 E4 M3 FN> to vector <8 xf16 >
239
+ // CHECK: amdgpu.scaled_ext_packed8
240
+ %ret1 = amdgpu.scaled_ext_packed8 %v , %scale [0 ] : vector <8 xf8 E4 M3 FN> to vector <8 xbf16 >
241
+ // CHECK: amdgpu.scaled_ext_packed8
242
+ %ret2 = amdgpu.scaled_ext_packed8 %v , %scale [0 ] : vector <8 xf8 E4 M3 FN> to vector <8 xf32 >
243
+ func.return %ret0 , %ret1 , %ret2 : vector <8 xf16 >, vector <8 xbf16 >, vector <8 xf32 >
244
+ }
245
+
246
+ // CHECK-LABEL: func.func @scaled_ext_packed8_bf8
247
+ func.func @scaled_ext_packed8_bf8 (%v: vector <8 xf8 E5 M2 >, %scale: f32 ) -> (vector <8 xf16 >, vector <8 xbf16 >, vector <8 xf32 >) {
248
+ // CHECK: amdgpu.scaled_ext_packed8
249
+ %ret0 = amdgpu.scaled_ext_packed8 %v , %scale [0 ] : vector <8 xf8 E5 M2 > to vector <8 xf16 >
250
+ // CHECK: amdgpu.scaled_ext_packed8
251
+ %ret1 = amdgpu.scaled_ext_packed8 %v , %scale [0 ] : vector <8 xf8 E5 M2 > to vector <8 xbf16 >
252
+ // CHECK: amdgpu.scaled_ext_packed8
253
+ %ret2 = amdgpu.scaled_ext_packed8 %v , %scale [0 ] : vector <8 xf8 E5 M2 > to vector <8 xf32 >
254
+ func.return %ret0 , %ret1 , %ret2 : vector <8 xf16 >, vector <8 xbf16 >, vector <8 xf32 >
255
+ }
256
+
257
+ // CHECK-LABEL: func.func @scaled_ext_packed16_fp6
258
+ func.func @scaled_ext_packed16_fp6 (%v: vector <16 xf6 E2 M3 FN>, %scale: f32 ) -> (vector <16 xf16 >, vector <16 xbf16 >, vector <16 xf32 >) {
259
+ // CHECK: amdgpu.scaled_ext_packed16
260
+ %ret0 = amdgpu.scaled_ext_packed16 %v , %scale [0 ] : vector <16 xf6 E2 M3 FN> to vector <16 xf16 >
261
+ // CHECK: amdgpu.scaled_ext_packed16
262
+ %ret1 = amdgpu.scaled_ext_packed16 %v , %scale [0 ] : vector <16 xf6 E2 M3 FN> to vector <16 xbf16 >
263
+ // CHECK: amdgpu.scaled_ext_packed16
264
+ %ret2 = amdgpu.scaled_ext_packed16 %v , %scale [0 ] : vector <16 xf6 E2 M3 FN> to vector <16 xf32 >
265
+ func.return %ret0 , %ret1 , %ret2 : vector <16 xf16 >, vector <16 xbf16 >, vector <16 xf32 >
266
+ }
267
+
268
+ // CHECK-LABEL: func.func @scaled_ext_packed16_bf16
269
+ func.func @scaled_ext_packed16_bf16 (%v: vector <16 xf6 E3 M2 FN>, %scale: f32 ) -> (vector <16 xf16 >, vector <16 xbf16 >, vector <16 xf32 >) {
270
+ // CHECK: amdgpu.scaled_ext_packed16
271
+ %ret0 = amdgpu.scaled_ext_packed16 %v , %scale [0 ] : vector <16 xf6 E3 M2 FN> to vector <16 xf16 >
272
+ // CHECK: amdgpu.scaled_ext_packed16
273
+ %ret1 = amdgpu.scaled_ext_packed16 %v , %scale [0 ] : vector <16 xf6 E3 M2 FN> to vector <16 xbf16 >
274
+ // CHECK: amdgpu.scaled_ext_packed16
275
+ %ret2 = amdgpu.scaled_ext_packed16 %v , %scale [0 ] : vector <16 xf6 E3 M2 FN> to vector <16 xf32 >
276
+ func.return %ret0 , %ret1 , %ret2 : vector <16 xf16 >, vector <16 xbf16 >, vector <16 xf32 >
277
+ }
278
+
224
279
// CHECK-LABEL: func.func @packed_scaled_trunc_f8e4m3_f32
225
280
// CHECK: amdgpu.packed_scaled_trunc
226
281
func.func @packed_scaled_trunc_f8e4m3_f32 (%v: vector <2 xf32 >, %scale: f32 ) -> vector <4 xf8 E4 M3 FN> {
0 commit comments