Skip to content

[AArch64] Using an undefined physical register after Load/store optimizer #164230

@davemgreen

Description

@davemgreen

From @sjoerdmeijer in #149380

I found another case or variant of this:

*** Bad machine code: Using an undefined physical register ***
- function:    _Z4testsyssysytyyyttyyxxiPA13_tPtPA17_aPA25_yS1_PA25_sPyPA25_A25_xPA25_tPA25_S4_S5_PaPA25_A25_a
- basic block: %bb.37 vector.body912 (0xaaaab0406d88)
- instruction: $q1 = ORRv16i8 $q0, killed $q0
- operand 2:   killed $q0

compiler explorer looks down, will post a link later, but compiling this with llc -verify-machineinstrs should be enough to reproduce:

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "aarch64-unknown-linux-gnu"

define void @_Z4testsyssysytyyyttyyxxiPA13_tPtPA17_aPA25_yS1_PA25_sPyPA25_A25_xPA25_tPA25_S4_S5_PaPA25_A25_a(ptr %arr_3, ptr %arr_4, ptr %0, ptr %1, ptr %2, ptr %3, ptr %4, ptr %5, ptr %6, i8 %7, ptr %8, ptr %9, ptr %10, ptr %11, ptr %12, ptr %13, ptr %14, ptr %15, <16 x i8> %16, ptr %17, ptr %arr_5, ptr %18, i1 %19) #0 {
entry:
  br label %vector.body912

vector.body912:                                   ; preds = %vector.body912, %entry
  %index913 = phi i64 [ 0, %entry ], [ %index.next925, %vector.body912 ]
  %vec.phi914 = phi <16 x i8> [ zeroinitializer, %entry ], [ %61, %vector.body912 ]
  %vec.phi915 = phi <16 x i8> [ zeroinitializer, %entry ], [ %62, %vector.body912 ]
  %vec.phi916 = phi <16 x i8> [ zeroinitializer, %entry ], [ %63, %vector.body912 ]
  %vec.phi917 = phi <16 x i8> [ zeroinitializer, %entry ], [ %64, %vector.body912 ]
  %20 = shl i64 %index913, 1
  %offset.idx9181 = add i64 -38119, %20
  %21 = or i64 %offset.idx9181, 0
  %22 = getelementptr i16, ptr %arr_3, i64 %offset.idx9181
  %23 = getelementptr i16, ptr %arr_3, i64 %20
  %24 = load i8, ptr null, align 1
  %25 = insertelement <16 x i8> zeroinitializer, i8 %7, i64 10
  %26 = insertelement <16 x i8> %25, i8 %24, i64 11
  %27 = load i8, ptr null, align 1
  %28 = load i8, ptr null, align 1
  %29 = insertelement <16 x i8> zeroinitializer, i8 %27, i64 0
  %30 = insertelement <16 x i8> %29, i8 %28, i64 15
  %31 = load i8, ptr null, align 1
  %32 = load i8, ptr %4, align 1
  %33 = load i8, ptr null, align 1
  %34 = load i8, ptr %9, align 1
  %35 = insertelement <16 x i8> zeroinitializer, i8 %31, i64 7
  %36 = insertelement <16 x i8> %35, i8 1, i64 1
  %37 = insertelement <16 x i8> %36, i8 %32, i64 0
  %38 = insertelement <16 x i8> %37, i8 %7, i64 12
  %39 = insertelement <16 x i8> %38, i8 %33, i64 13
  %40 = insertelement <16 x i8> %39, i8 %34, i64 14
  %41 = sext <16 x i8> %26 to <16 x i16>
  %42 = sext <16 x i8> %16 to <16 x i16>
  %43 = sext <16 x i8> %30 to <16 x i16>
  %44 = sext <16 x i8> %40 to <16 x i16>
  %45 = load <61 x i16>, ptr %22, align 2
  %strided.vec = shufflevector <61 x i16> %45, <61 x i16> zeroinitializer, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
  %46 = load <61 x i16>, ptr %2, align 2
  %strided.vec920 = shufflevector <61 x i16> %46, <61 x i16> zeroinitializer, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
  %47 = load <61 x i16>, ptr %arr_4, align 2
  %strided.vec922 = shufflevector <61 x i16> %47, <61 x i16> zeroinitializer, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
  %48 = load <61 x i16>, ptr %arr_3, align 2
  %strided.vec924 = shufflevector <61 x i16> %48, <61 x i16> zeroinitializer, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
  %49 = or <16 x i16> %strided.vec, %41
  %50 = or <16 x i16> %strided.vec920, %42
  %51 = or <16 x i16> %strided.vec922, %43
  %52 = or <16 x i16> %strided.vec924, %44
  %53 = icmp eq <16 x i16> %49, zeroinitializer
  %54 = icmp eq <16 x i16> %50, zeroinitializer
  %55 = icmp eq <16 x i16> %51, zeroinitializer
  %56 = icmp eq <16 x i16> %52, zeroinitializer
  %57 = select <16 x i1> %53, <16 x i8> %26, <16 x i8> zeroinitializer
  %58 = select <16 x i1> %54, <16 x i8> <i8 0, i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> zeroinitializer
  %59 = select <16 x i1> %55, <16 x i8> %29, <16 x i8> zeroinitializer
  %60 = select <16 x i1> %56, <16 x i8> %40, <16 x i8> splat (i8 1)
  %61 = xor <16 x i8> %57, %vec.phi914
  %62 = xor <16 x i8> %58, %vec.phi915
  %63 = xor <16 x i8> %59, %vec.phi916
  %64 = xor <16 x i8> %60, %vec.phi917
  %65 = getelementptr i64, ptr %arr_5, i64 %20
  store i64 0, ptr %arr_4, align 8
  store i64 0, ptr %arr_3, align 8
  store i64 0, ptr %arr_4, align 8
  store i64 0, ptr %arr_3, align 8
  store i64 0, ptr %1, align 8
  store i64 0, ptr %arr_3, align 8
  store i64 0, ptr %arr_4, align 8
  store i64 0, ptr %arr_3, align 8
  store i64 0, ptr %0, align 8
  store i64 0, ptr %arr_5, align 8
  store i64 0, ptr %arr_3, align 8
  store i64 0, ptr %10, align 8
  store i64 0, ptr %arr_3, align 8
  store i64 0, ptr %8, align 8
  store i64 0, ptr %arr_3, align 8
  store i64 0, ptr %17, align 8
  store i64 0, ptr %12, align 8
  store i64 0, ptr %arr_3, align 8
  store i64 0, ptr %arr_4, align 8
  store i64 0, ptr %arr_3, align 8
  store i64 0, ptr %18, align 8
  store i64 0, ptr %arr_4, align 8
  store i64 0, ptr %arr_3, align 8
  store i64 0, ptr %arr_4, align 8
  store i64 0, ptr %15, align 8
  store i64 0, ptr %arr_3, align 8
  store i64 0, ptr %65, align 8
  store i64 0, ptr %arr_3, align 8
  store i64 0, ptr %13, align 8
  store i64 0, ptr %arr_3, align 8
  store i64 0, ptr %14, align 8
  store i64 0, ptr %arr_4, align 8
  store i64 0, ptr %arr_3, align 8
  store i64 0, ptr %arr_4, align 8
  store i64 0, ptr %11, align 8
  store i64 0, ptr %arr_3, align 8
  store i64 0, ptr %arr_4, align 8
  store i64 0, ptr %arr_3, align 8
  store i64 0, ptr %arr_4, align 8
  store i64 0, ptr %arr_3, align 8
  store i64 0, ptr %3, align 8
  store i64 0, ptr %arr_4, align 8
  store i64 0, ptr %arr_3, align 8
  store i64 0, ptr %5, align 8
  store i64 0, ptr %6, align 8
  %index.next925 = add i64 %index913, 64
  %66 = icmp eq i64 %index913, 0
  br i1 %19, label %middle.block926, label %vector.body912

middle.block926:                                  ; preds = %vector.body912
  %bin.rdx927 = xor <16 x i8> %vec.phi915, %vec.phi914
  %bin.rdx928 = xor <16 x i8> %vec.phi916, zeroinitializer
  %bin.rdx929 = xor <16 x i8> %vec.phi917, zeroinitializer
  ret void
}

attributes #0 = { "frame-pointer"="non-leaf" "target-cpu"="grace" }

CC: @davemgreen , @rj-jesus

Metadata

Metadata

Assignees

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions