[NVPTX] Adapt tests to make them usable with CUDA-12.x

CUDA-12 no longer supports 32-bit compilation. Tests agnostic to 32/64 compilation mode are switched to use nvptx64. Tests that do care about it have 32-bit ptxas compilation disabled with cuda-12+. Differential Revision: https://reviews.llvm.org/D152199
llvm · Jun 6, 2023 · ef8655a · ef8655a
1 parent 6f1d21d
commit ef8655a
Show file tree

Hide file tree

Showing 130 changed files with 477 additions and 470 deletions.
diff --git a/llvm/test/CodeGen/NVPTX/access-non-generic.ll b/llvm/test/CodeGen/NVPTX/access-non-generic.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix PTX
 ; RUN: opt -mtriple=nvptx-- < %s -S -passes=infer-address-spaces | FileCheck %s --check-prefix IR
 ; RUN: opt -mtriple=nvptx64-- < %s -S -passes=infer-address-spaces | FileCheck %s --check-prefix IR
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 @array = internal addrspace(3) global [10 x float] zeroinitializer, align 4

diff --git a/llvm/test/CodeGen/NVPTX/add-sub-128bit.ll b/llvm/test/CodeGen/NVPTX/add-sub-128bit.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefixes=COMMON,NOCARRY
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 -mattr=+ptx43 | FileCheck %s --check-prefixes=COMMON,CARRY
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefixes=COMMON,NOCARRY
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=+ptx43 | FileCheck %s --check-prefixes=COMMON,CARRY
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
 

diff --git a/llvm/test/CodeGen/NVPTX/addrspacecast-gvar.ll b/llvm/test/CodeGen/NVPTX/addrspacecast-gvar.ll
@@ -1,12 +1,12 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 ; CHECK: .visible .global .align 4 .u32 g = 42;
 ; CHECK: .visible .global .align 1 .b8 ga[4] = {0, 1, 2, 3};
-; CHECK: .visible .global .align 4 .u32 g2 = generic(g);
-; CHECK: .visible .global .align 4 .u32 g3 = g;
-; CHECK: .visible .global .align 8 .u32 g4[2] = {0, generic(g)};
-; CHECK: .visible .global .align 8 .u32 g5[2] = {0, generic(g)+8};
+; CHECK: .visible .global .align 8 .u64 g2 = generic(g);
+; CHECK: .visible .global .align 8 .u64 g3 = g;
+; CHECK: .visible .global .align 8 .u64 g4[2] = {0, generic(g)};
+; CHECK: .visible .global .align 8 .u64 g5[2] = {0, generic(g)+8};
 
 @g = addrspace(1) global i32 42
 @ga = addrspace(1) global [4 x i8] c"\00\01\02\03"
@@ -15,34 +15,34 @@
 @g4 = constant {ptr, ptr} {ptr null, ptr addrspacecast (ptr addrspace(1) @g to ptr)}
 @g5 = constant {ptr, ptr} {ptr null, ptr addrspacecast (ptr addrspace(1) getelementptr (i32, ptr addrspace(1) @g, i32 2) to ptr)}
 
-; CHECK: .visible .global .align 4 .u32 g6 = generic(ga)+2;
+; CHECK: .visible .global .align 8 .u64 g6 = generic(ga)+2;
 @g6 = addrspace(1) global ptr getelementptr inbounds (
   [4 x i8], ptr addrspacecast (ptr addrspace(1) @ga to ptr),
   i32 0, i32 2
 )
 
-; CHECK: .visible .global .align 4 .u32 g7 = generic(g);
+; CHECK: .visible .global .align 8 .u64 g7 = generic(g);
 @g7 = addrspace(1) global ptr addrspacecast (
   ptr addrspace(1) @g
   to ptr
 )
 
-; CHECK: .visible .global .align 4 .u32 g8[2] = {0, g};
+; CHECK: .visible .global .align 8 .u64 g8[2] = {0, g};
 @g8 = addrspace(1) global [2 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) @g]
 
-; CHECK: .visible .global .align 4 .u32 g9[2] = {0, generic(g)};
+; CHECK: .visible .global .align 8 .u64 g9[2] = {0, generic(g)};
 @g9 = addrspace(1) global [2 x ptr] [
   ptr null,
   ptr addrspacecast (ptr addrspace(1) @g to ptr)
 ]
 
-; CHECK: .visible .global .align 4 .u32 g10[2] = {0, g};
+; CHECK: .visible .global .align 8 .u64 g10[2] = {0, g};
 @g10 = addrspace(1) global [2 x ptr addrspace(1)] [
   ptr addrspace(1) null,
   ptr addrspace(1) @g
 ]
 
-; CHECK: .visible .global .align 4 .u32 g11[2] = {0, generic(g)};
+; CHECK: .visible .global .align 8 .u64 g11[2] = {0, generic(g)};
 @g11 = addrspace(1) global [2 x ptr] [
   ptr null,
   ptr addrspacecast (ptr addrspace(1) @g to ptr)

diff --git a/llvm/test/CodeGen/NVPTX/addrspacecast.ll b/llvm/test/CodeGen/NVPTX/addrspacecast.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -O0 < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -check-prefixes=ALL,CLS32,G32
 ; RUN: llc -O0 < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s -check-prefixes=ALL,NOPTRCONV,CLS64,G64
 ; RUN: llc -O0 < %s -march=nvptx64 -mcpu=sm_20 --nvptx-short-ptr| FileCheck %s -check-prefixes=ALL,PTRCONV,CLS64,G64
-; RUN: %if ptxas %{ llc -O0 < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas && !ptxas-12.0 %{ llc -O0 < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc -O0 < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc -O0 < %s -march=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | %ptxas-verify %}
 

diff --git a/llvm/test/CodeGen/NVPTX/aggr-param.ll b/llvm/test/CodeGen/NVPTX/aggr-param.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 ; Make sure aggregate param types get emitted properly.
 

diff --git a/llvm/test/CodeGen/NVPTX/annotations.ll b/llvm/test/CodeGen/NVPTX/annotations.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 @texture = internal addrspace(1) global i64 0, align 8

diff --git a/llvm/test/CodeGen/NVPTX/arg-lowering.ll b/llvm/test/CodeGen/NVPTX/arg-lowering.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 ; CHECK: .visible .func  (.param .align 16 .b8 func_retval0[16]) foo0(
 ; CHECK:          .param .align 4 .b8 foo0_param_0[8]

diff --git a/llvm/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll b/llvm/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %}
+; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %}
 
 ;; These tests should run for all targets

diff --git a/llvm/test/CodeGen/NVPTX/arithmetic-int.ll b/llvm/test/CodeGen/NVPTX/arithmetic-int.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 ;; These tests should run for all targets

diff --git a/llvm/test/CodeGen/NVPTX/async-copy.ll b/llvm/test/CodeGen/NVPTX/async-copy.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=nvptx -mcpu=sm_80 -mattr=+ptx70 | FileCheck -check-prefixes=CHECK,CHECK_PTX32 %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | FileCheck -check-prefixes=CHECK,CHECK_PTX64 %s
-; RUN: %if ptxas-11.0 %{ llc < %s -march=nvptx -mcpu=sm_80 -mattr=+ptx70 | %ptxas-verify -arch=sm_80 %}
+; RUN: %if ptxas-11.0 && ! ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_80 -mattr=+ptx70 | %ptxas-verify -arch=sm_80 %}
 ; RUN: %if ptxas-11.0 %{ llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | %ptxas-verify -arch=sm_80 %}
 
 declare void @llvm.nvvm.cp.async.wait.group(i32)

diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm60.ll b/llvm/test/CodeGen/NVPTX/atomics-sm60.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=nvptx -mcpu=sm_60 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
+; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
 
 ; CHECK-LABEL: .func test(

diff --git a/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll b/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=nvptx -mcpu=sm_60 | FileCheck %s -check-prefixes=CHECK,CHECK32
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
+; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
 
 ; CHECK-LABEL: .func test_atomics_scope(

diff --git a/llvm/test/CodeGen/NVPTX/atomics.ll b/llvm/test/CodeGen/NVPTX/atomics.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_32 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_32 | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_32 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_32 | %ptxas-verify %}
 
 
 ; CHECK-LABEL: atom0

diff --git a/llvm/test/CodeGen/NVPTX/bf16.ll b/llvm/test/CodeGen/NVPTX/bf16.ll
@@ -1,23 +1,23 @@
-; RUN: llc < %s -march=nvptx | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %}
 
 ; LDST: .b8 bfloat_array[8] = {1, 2, 3, 4, 5, 6, 7, 8};
 @"bfloat_array" = addrspace(1) constant [4 x bfloat]
                 [bfloat 0xR0201, bfloat 0xR0403, bfloat 0xR0605, bfloat 0xR0807]
 
 define void @test_load_store(ptr addrspace(1) %in, ptr addrspace(1) %out) {
 ; CHECK-LABEL: @test_load_store
-; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}]
-; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]]
+; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%rd[0-9]+}}]
+; CHECK: st.global.b16 [{{%rd[0-9]+}}], [[TMP]]
   %val = load bfloat, ptr addrspace(1) %in
   store bfloat %val, ptr addrspace(1) %out
   ret void
 }
 
 define void @test_bitcast_from_bfloat(ptr addrspace(1) %in, ptr addrspace(1) %out) {
 ; CHECK-LABEL: @test_bitcast_from_bfloat
-; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}]
-; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]]
+; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%rd[0-9]+}}]
+; CHECK: st.global.b16 [{{%rd[0-9]+}}], [[TMP]]
   %val = load bfloat, ptr addrspace(1) %in
   %val_int = bitcast bfloat %val to i16
   store i16 %val_int, ptr addrspace(1) %out
@@ -26,8 +26,8 @@ define void @test_bitcast_from_bfloat(ptr addrspace(1) %in, ptr addrspace(1) %ou
 
 define void @test_bitcast_to_bfloat(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; CHECK-LABEL: @test_bitcast_to_bfloat
-; CHECK: ld.global.u16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}]
-; CHECK: st.global.u16 [{{%r[0-9]+}}], [[TMP]]
+; CHECK: ld.global.u16 [[TMP:%rs[0-9]+]], [{{%rd[0-9]+}}]
+; CHECK: st.global.u16 [{{%rd[0-9]+}}], [[TMP]]
   %val = load i16, ptr addrspace(1) %in
   %val_fp = bitcast i16 %val to bfloat
   store bfloat %val_fp, ptr addrspace(1) %out

diff --git a/llvm/test/CodeGen/NVPTX/bfe.ll b/llvm/test/CodeGen/NVPTX/bfe.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 
 ; CHECK: bfe0

diff --git a/llvm/test/CodeGen/NVPTX/bug17709.ll b/llvm/test/CodeGen/NVPTX/bug17709.ll
@@ -1,27 +1,27 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
-
-; ModuleID = '__kernelgen_main_module'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
-target triple = "nvptx64-nvidia-cuda"
-
-define private ptx_device { double, double } @__utils1_MOD_trace(ptr noalias %m) {
-entry:
-  ;unreachable
-  %t0 = insertvalue {double, double} undef, double 1.0, 0
-  %t1 = insertvalue {double, double} %t0, double 1.0, 1
-  ret { double, double } %t1
-}
-
-%struct.descriptor_dimension.0.52 = type { i64, i64, i64 }
-%"struct.array2_complex(kind=8).37.18.70" = type { ptr, i64, i64, [2 x %struct.descriptor_dimension.0.52] }
-%"struct.array2_complex(kind=8).43.5.57" = type { ptr, i64, i64, [2 x %struct.descriptor_dimension.0.52] }
-@replacementOfAlloca8 = private global %"struct.array2_complex(kind=8).37.18.70" zeroinitializer, align 4096
-
-; CHECK: .visible .entry __kernelgen_main
-define ptx_kernel void @__kernelgen_main(ptr nocapture %args, ptr) {
-entry:
-  %1 = tail call ptx_device { double, double } @__utils1_MOD_trace(ptr noalias @replacementOfAlloca8)
-  ret void
-}
-
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
+
+; ModuleID = '__kernelgen_main_module'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+define private ptx_device { double, double } @__utils1_MOD_trace(ptr noalias %m) {
+entry:
+  ;unreachable
+  %t0 = insertvalue {double, double} undef, double 1.0, 0
+  %t1 = insertvalue {double, double} %t0, double 1.0, 1
+  ret { double, double } %t1
+}
+
+%struct.descriptor_dimension.0.52 = type { i64, i64, i64 }
+%"struct.array2_complex(kind=8).37.18.70" = type { ptr, i64, i64, [2 x %struct.descriptor_dimension.0.52] }
+%"struct.array2_complex(kind=8).43.5.57" = type { ptr, i64, i64, [2 x %struct.descriptor_dimension.0.52] }
+@replacementOfAlloca8 = private global %"struct.array2_complex(kind=8).37.18.70" zeroinitializer, align 4096
+
+; CHECK: .visible .entry __kernelgen_main
+define ptx_kernel void @__kernelgen_main(ptr nocapture %args, ptr) {
+entry:
+  %1 = tail call ptx_device { double, double } @__utils1_MOD_trace(ptr noalias @replacementOfAlloca8)
+  ret void
+}
+
diff --git a/llvm/test/CodeGen/NVPTX/bug22246.ll b/llvm/test/CodeGen/NVPTX/bug22246.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
 target triple = "nvptx64-nvidia-cuda"

diff --git a/llvm/test/CodeGen/NVPTX/bug22322.ll b/llvm/test/CodeGen/NVPTX/bug22322.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
 target triple = "nvptx64-nvidia-cuda"

diff --git a/llvm/test/CodeGen/NVPTX/bug26185-2.ll b/llvm/test/CodeGen/NVPTX/bug26185-2.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_35 -verify-machineinstrs | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify %}
 
 ; Verify that we correctly emit code for extending ldg/ldu. We do not expose
 ; extending variants in the backend, but the ldg/ldu selection code may pick

diff --git a/llvm/test/CodeGen/NVPTX/bug26185.ll b/llvm/test/CodeGen/NVPTX/bug26185.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_35 -verify-machineinstrs | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify %}
 
 ; Verify that we correctly emit code for i8 ldg/ldu. We do not expose 8-bit
 ; registers in the backend, so these loads need special handling.

diff --git a/llvm/test/CodeGen/NVPTX/bug52623.ll b/llvm/test/CodeGen/NVPTX/bug52623.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=nvptx -verify-machineinstrs
-; RUN: %if ptxas %{ llc < %s -march=nvptx | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 -verify-machineinstrs
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %}
 
 ; Check that llc will not crash even when first MBB doesn't contain
 ; any instruction.

diff --git a/llvm/test/CodeGen/NVPTX/bypass-div.ll b/llvm/test/CodeGen/NVPTX/bypass-div.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify %}
 
 ; 64-bit divides and rems should be split into a fast and slow path where
 ; the fast path uses a 32-bit operation.

diff --git a/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll b/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_50 -verify-machineinstrs | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_50 -verify-machineinstrs | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_50 -verify-machineinstrs | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_50 -verify-machineinstrs | %ptxas-verify %}
 
 ; calls with a bitcasted function symbol should be fine, but in combination with
 ; a byval attribute were causing a segfault during isel. This testcase was

diff --git a/llvm/test/CodeGen/NVPTX/callchain.ll b/llvm/test/CodeGen/NVPTX/callchain.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 target triple = "nvptx"
 

diff --git a/llvm/test/CodeGen/NVPTX/calling-conv.ll b/llvm/test/CodeGen/NVPTX/calling-conv.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 

diff --git a/llvm/test/CodeGen/NVPTX/calls-with-phi.ll b/llvm/test/CodeGen/NVPTX/calls-with-phi.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=nvptx 2>&1 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 2>&1 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %}
 
 ; Make sure the example doesn't crash with segfault
 

diff --git a/llvm/test/CodeGen/NVPTX/combine-min-max.ll b/llvm/test/CodeGen/NVPTX/combine-min-max.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 -O2 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -O2 | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -O2 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -O2 | %ptxas-verify %}
 
 ; *************************************
 ; * Cases with no min/max

diff --git a/llvm/test/CodeGen/NVPTX/compare-int.ll b/llvm/test/CodeGen/NVPTX/compare-int.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 ;; These tests should run for all targets

diff --git a/llvm/test/CodeGen/NVPTX/constant-vectors.ll b/llvm/test/CodeGen/NVPTX/constant-vectors.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 target triple = "nvptx-nvidia-cuda"
 

diff --git a/llvm/test/CodeGen/NVPTX/convert-fp.ll b/llvm/test/CodeGen/NVPTX/convert-fp.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 define i16 @cvt_u16_f32(float %x) {

diff --git a/llvm/test/CodeGen/NVPTX/convert-int-sm20.ll b/llvm/test/CodeGen/NVPTX/convert-int-sm20.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
+; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 

diff --git a/llvm/test/CodeGen/NVPTX/ctlz.ll b/llvm/test/CodeGen/NVPTX/ctlz.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
 

diff --git a/llvm/test/CodeGen/NVPTX/ctpop.ll b/llvm/test/CodeGen/NVPTX/ctpop.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"