From f4a9e3bc285f4d7b1ff1498cc159b162ba6e7350 Mon Sep 17 00:00:00 2001
From: Sasa Stankovic <Sasa.Stankovic@imgtec.com>
Date: Tue, 29 Jul 2014 14:39:24 +0000
Subject: [PATCH] [mips] Don't use odd-numbered single precision registers for
 fastcc calling convention if -mno-odd-spreg is used.

Differential Revision: http://reviews.llvm.org/D4682

llvm-svn: 214180
---
 llvm/lib/Target/Mips/MipsCallingConv.td |  9 ++-
 llvm/lib/Target/Mips/MipsSubtarget.h    |  1 +
 llvm/test/CodeGen/Mips/fastcc.ll        | 84 +++++++++++++++++++++++++
 3 files changed, 92 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/Mips/MipsCallingConv.td b/llvm/lib/Target/Mips/MipsCallingConv.td
index 85704b9917a30..b1cd3c35782f2 100644
--- a/llvm/lib/Target/Mips/MipsCallingConv.td
+++ b/llvm/lib/Target/Mips/MipsCallingConv.td
@@ -203,8 +203,13 @@ def CC_Mips_FastCC : CallingConv<[
       CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, V1]>>>,
 
   // f32 arguments are passed in single-precision floating pointer registers.
-  CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10,
-                                 F11, F12, F13, F14, F15, F16, F17, F18, F19]>>,
+  CCIfType<[f32], CCIfSubtarget<"useOddSPReg()",
+      CCAssignToReg<[F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13,
+                     F14, F15, F16, F17, F18, F19]>>>,
+
+  // Don't use odd numbered single-precision registers for -mno-odd-spreg.
+  CCIfType<[f32], CCIfSubtarget<"noOddSPReg()",
+      CCAssignToReg<[F0, F2, F4, F6, F8, F10, F12, F14, F16, F18]>>>,
 
   // Stack parameter slots for i32 and f32 are 32-bit words and 4-byte aligned.
   CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
diff --git a/llvm/lib/Target/Mips/MipsSubtarget.h b/llvm/lib/Target/Mips/MipsSubtarget.h
index 70aa148d9bbb7..f3264621a7006 100644
--- a/llvm/lib/Target/Mips/MipsSubtarget.h
+++ b/llvm/lib/Target/Mips/MipsSubtarget.h
@@ -203,6 +203,7 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
   bool isFPXX() const { return IsFPXX; }
   bool isFP64bit() const { return IsFP64bit; }
   bool useOddSPReg() const { return UseOddSPReg; }
+  bool noOddSPReg() const { return !UseOddSPReg; }
   bool isNaN2008() const { return IsNaN2008bit; }
   bool isNotFP64bit() const { return !IsFP64bit; }
   bool isGP64bit() const { return IsGP64bit; }
diff --git a/llvm/test/CodeGen/Mips/fastcc.ll b/llvm/test/CodeGen/Mips/fastcc.ll
index 8ee7af88c6b73..822902c27d2fa 100644
--- a/llvm/test/CodeGen/Mips/fastcc.ll
+++ b/llvm/test/CodeGen/Mips/fastcc.ll
@@ -1,6 +1,7 @@
 ; RUN: llc  < %s -march=mipsel | FileCheck %s 
 ; RUN: llc  < %s -mtriple=mipsel-none-nacl-gnu \
 ; RUN:  | FileCheck %s -check-prefix=CHECK-NACL
+; RUN: llc  < %s -march=mipsel -mcpu=mips32 -mattr=+nooddspreg | FileCheck %s -check-prefix=NOODDSPREG
 
 
 @gi0 = external global i32
@@ -80,6 +81,8 @@
 @g15 = external global i32
 @g16 = external global i32
 
+@fa = common global [11 x float] zeroinitializer, align 4
+
 define void @caller0() nounwind {
 entry:
 ; CHECK: caller0
@@ -264,3 +267,84 @@ entry:
   ret void
 }
 
+define void @caller2() {
+entry:
+
+; NOODDSPREG-LABEL:  caller2
+
+; Check that first 10 arguments are passed in even float registers
+; f0, f2, ... , f18. Check that 11th argument is passed on stack.
+
+; NOODDSPREG-DAG:    lw      $[[R0:[0-9]+]], %got(fa)(${{[0-9]+|gp}})
+; NOODDSPREG-DAG:    lwc1    $f0, 0($[[R0]])
+; NOODDSPREG-DAG:    lwc1    $f2, 4($[[R0]])
+; NOODDSPREG-DAG:    lwc1    $f4, 8($[[R0]])
+; NOODDSPREG-DAG:    lwc1    $f6, 12($[[R0]])
+; NOODDSPREG-DAG:    lwc1    $f8, 16($[[R0]])
+; NOODDSPREG-DAG:    lwc1    $f10, 20($[[R0]])
+; NOODDSPREG-DAG:    lwc1    $f12, 24($[[R0]])
+; NOODDSPREG-DAG:    lwc1    $f14, 28($[[R0]])
+; NOODDSPREG-DAG:    lwc1    $f16, 32($[[R0]])
+; NOODDSPREG-DAG:    lwc1    $f18, 36($[[R0]])
+
+; NOODDSPREG-DAG:    lwc1    $[[F0:f[0-9]*[02468]]], 40($[[R0]])
+; NOODDSPREG-DAG:    swc1    $[[F0]], 0($sp)
+
+  %0 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 0), align 4
+  %1 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 1), align 4
+  %2 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 2), align 4
+  %3 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 3), align 4
+  %4 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 4), align 4
+  %5 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 5), align 4
+  %6 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 6), align 4
+  %7 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 7), align 4
+  %8 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 8), align 4
+  %9 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 9), align 4
+  %10 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 10), align 4
+  tail call fastcc void @callee2(float %0, float %1, float %2, float %3,
+                                 float %4, float %5, float %6, float %7,
+                                 float %8, float %9, float %10)
+  ret void
+}
+
+define fastcc void @callee2(float %a0, float %a1, float %a2, float %a3,
+                            float %a4, float %a5, float %a6, float %a7,
+                            float %a8, float %a9, float %a10) {
+entry:
+
+; NOODDSPREG-LABEL:  callee2
+
+; NOODDSPREG:        addiu   $sp, $sp, -[[OFFSET:[0-9]+]]
+
+; Check that first 10 arguments are received in even float registers
+; f0, f2, ... , f18. Check that 11th argument is received on stack.
+
+; NOODDSPREG-DAG:    lw      $[[R0:[0-9]+]], %got(fa)(${{[0-9]+|gp}})
+; NOODDSPREG-DAG:    swc1    $f0, 0($[[R0]])
+; NOODDSPREG-DAG:    swc1    $f2, 4($[[R0]])
+; NOODDSPREG-DAG:    swc1    $f4, 8($[[R0]])
+; NOODDSPREG-DAG:    swc1    $f6, 12($[[R0]])
+; NOODDSPREG-DAG:    swc1    $f8, 16($[[R0]])
+; NOODDSPREG-DAG:    swc1    $f10, 20($[[R0]])
+; NOODDSPREG-DAG:    swc1    $f12, 24($[[R0]])
+; NOODDSPREG-DAG:    swc1    $f14, 28($[[R0]])
+; NOODDSPREG-DAG:    swc1    $f16, 32($[[R0]])
+; NOODDSPREG-DAG:    swc1    $f18, 36($[[R0]])
+
+; NOODDSPREG-DAG:    lwc1    $[[F0:f[0-9]*[02468]]], [[OFFSET]]($sp)
+; NOODDSPREG-DAG:    swc1    $[[F0]], 40($[[R0]])
+
+  store float %a0, float* getelementptr ([11 x float]* @fa, i32 0, i32 0), align 4
+  store float %a1, float* getelementptr ([11 x float]* @fa, i32 0, i32 1), align 4
+  store float %a2, float* getelementptr ([11 x float]* @fa, i32 0, i32 2), align 4
+  store float %a3, float* getelementptr ([11 x float]* @fa, i32 0, i32 3), align 4
+  store float %a4, float* getelementptr ([11 x float]* @fa, i32 0, i32 4), align 4
+  store float %a5, float* getelementptr ([11 x float]* @fa, i32 0, i32 5), align 4
+  store float %a6, float* getelementptr ([11 x float]* @fa, i32 0, i32 6), align 4
+  store float %a7, float* getelementptr ([11 x float]* @fa, i32 0, i32 7), align 4
+  store float %a8, float* getelementptr ([11 x float]* @fa, i32 0, i32 8), align 4
+  store float %a9, float* getelementptr ([11 x float]* @fa, i32 0, i32 9), align 4
+  store float %a10, float* getelementptr ([11 x float]* @fa, i32 0, i32 10), align 4
+  ret void
+}
+