[hardware] Add support for 16alt, 8, 8alt

pulp-platform · Jun 26, 2024 · de673f6 · de673f6
1 parent 9992f90
commit de673f6
Show file tree

Hide file tree

Showing 10 changed files with 229 additions and 48 deletions.
diff --git a/hardware/include/ara_pkg.sv b/hardware/include/ara_pkg.sv
@@ -45,29 +45,42 @@ package ara_pkg;
     FPExtSupportEnable  = 1'b1
   } fpext_support_e;
 
-  // The three bits correspond to {RVVD, RVVF, RVVH}
-  typedef enum logic [2:0] {
-    FPUSupportNone             = 3'b000,
-    FPUSupportHalf             = 3'b001,
-    FPUSupportSingle           = 3'b010,
-    FPUSupportHalfSingle       = 3'b011,
-    FPUSupportDouble           = 3'b100,
-    FPUSupportSingleDouble     = 3'b110,
-    FPUSupportHalfSingleDouble = 3'b111
+  // The six bits correspond to {RVVD, RVVF, RVVH, RVVHA, RVVB, RVVBA}
+  typedef enum logic [5:0] {
+    FPUSupportNone             = 6'b000000,
+    FPUSupportHalf             = 6'b001000,
+    FPUSupportSingle           = 6'b010000,
+    FPUSupportHalfSingle       = 6'b011000,
+    FPUSupportDouble           = 6'b100000,
+    FPUSupportSingleDouble     = 6'b110000,
+    FPUSupportHalfSingleDouble = 6'b111000,
+    FPUSupportAll              = 6'b111111
   } fpu_support_e;
 
   function automatic logic RVVD(fpu_support_e e);
-    return e[2];
+    return e[5];
   endfunction : RVVD
 
   function automatic logic RVVF(fpu_support_e e);
-    return e[1];
+    return e[4];
   endfunction : RVVF
 
   function automatic logic RVVH(fpu_support_e e);
-    return e[0];
+    return e[3];
   endfunction : RVVH
 
+  function automatic logic RVVHA(fpu_support_e e);
+    return e[2];
+  endfunction : RVVHA
+
+  function automatic logic RVVB(fpu_support_e e);
+    return e[1];
+  endfunction : RVVB
+
+  function automatic logic RVVBA(fpu_support_e e);
+    return e[0];
+  endfunction : RVVBA
+
   // Multiplier latencies.
   localparam int unsigned LatMultiplierEW64 = 1;
   localparam int unsigned LatMultiplierEW32 = 1;
@@ -218,6 +231,24 @@ package ara_pkg;
   } resize_e;
 
   // Floating-Point structs for re-encoding during widening FP operations
+  typedef struct packed {
+    logic s;
+    logic [3:0] e;
+    logic [2:0] m;
+  } fp8alt_t;
+
+  typedef struct packed {
+    logic s;
+    logic [4:0] e;
+    logic [1:0] m;
+  } fp8_t;
+
+  typedef struct packed {
+    logic s;
+    logic [7:0] e;
+    logic [6:0] m;
+  } fp16alt_t;
+
   typedef struct packed {
     logic s;
     logic [4:0] e;
@@ -248,6 +279,30 @@ package ara_pkg;
     endcase
   endfunction
 
+  function fp16_t fp16_from_fp8(fp8_t fp8, logic [$clog2(fp_mantissa_bits(rvv_pkg::EW8, 0)):0] fp8_m_lzc);
+    automatic fp8_t fp8_temp;
+    automatic fp16_t fp16;
+
+    // Wide sign
+    fp16.s = fp8.s;
+
+    // Wide exponent
+    // 15 - 7 = 8
+    unique case(fp8.e)
+      '0:      fp16.e = (fp8.m == '0) ? '0 : 5'd8 - {3'd0, fp8_m_lzc}; // Zero or Subnormal
+      '1:      fp16.e = '1; // NaN
+      default: fp16.e = 5'd8 + fp8.e; // Normal
+    endcase
+
+    // Wide mantissa
+    // If the input is NaN, output a quiet NaN mantissa.
+    // Otherwise, append trailing zeros to the mantissa.
+    fp8_temp.m = ((fp8.e == '0) && (fp8.m != '0)) ? (fp8.m << 1) << fp8_m_lzc : fp8.m;
+    fp16.m = ((fp8.e == '1) && (fp8.m != '0) ) ? {1'b1, 9'b0} : {fp8_temp.m, 8'b0};
+
+    fp16_from_fp8 = fp16;
+  endfunction
+
   function fp32_t fp32_from_fp16(fp16_t fp16, logic [$clog2(fp_mantissa_bits(rvv_pkg::EW16, 0)):0] fp16_m_lzc);
     automatic fp16_t fp16_temp;
     automatic fp32_t fp32;

diff --git a/hardware/src/ara.sv b/hardware/src/ara.sv
@@ -10,7 +10,7 @@ module ara import ara_pkg::*; #(
     // RVV Parameters
     parameter  int           unsigned NrLanes      = 0,                          // Number of parallel vector lanes.
     // Support for floating-point data types
-    parameter  fpu_support_e          FPUSupport   = FPUSupportHalfSingleDouble,
+    parameter  fpu_support_e          FPUSupport   = FPUSupportAll,
     // External support for vfrec7, vfrsqrt7
     parameter  fpext_support_e        FPExtSupport = FPExtSupportEnable,
     // Support for fixed-point data types

diff --git a/hardware/src/ara_dispatcher.sv b/hardware/src/ara_dispatcher.sv
@@ -11,7 +11,7 @@
 module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
     parameter int           unsigned NrLanes      = 0,
     // Support for floating-point data types
-    parameter fpu_support_e          FPUSupport   = FPUSupportHalfSingleDouble,
+    parameter fpu_support_e          FPUSupport   = FPUSupportAll,
     // External support for vfrec7, vfrsqrt7
     parameter fpext_support_e        FPExtSupport = FPExtSupportEnable,
     // Support for fixed-point data types
@@ -2230,9 +2230,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     end
                   end
 
-                  // Ara can support 16-bit float, 32-bit float, 64-bit float.
+                  // Ara can support 8-bit float, 16-bit float, 32-bit float, 64-bit float.
                   // Ara cannot support instructions who operates on more than 64 bits.
                   unique case (FPUSupport)
+                    FPUSupportAll: if (int'(ara_req_d.vtype.vsew) > int'(EW64) || int'(ara_req_d.eew_vs2) > int'(EW64))
+                          illegal_insn = 1'b1;
                     FPUSupportHalfSingleDouble: if (int'(ara_req_d.vtype.vsew) < int'(EW16) ||
                           int'(ara_req_d.vtype.vsew) > int'(EW64) || int'(ara_req_d.eew_vs2) > int'(EW64))
                           illegal_insn = 1'b1;
@@ -2476,6 +2478,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                   // Ara can support 16-bit float, 32-bit float, 64-bit float.
                   // Ara cannot support instructions who operates on more than 64 bits.
                   unique case (FPUSupport)
+                    FPUSupportAll: if (int'(ara_req_d.vtype.vsew) > int'(EW64)) illegal_insn = 1'b1;
                     FPUSupportHalfSingleDouble: if (int'(ara_req_d.vtype.vsew) < int'(EW16) ||
                           int'(ara_req_d.vtype.vsew) > int'(EW64)) illegal_insn = 1'b1;
                     FPUSupportHalfSingle: if (int'(ara_req_d.vtype.vsew) < int'(EW16) ||

diff --git a/hardware/src/ara_soc.sv b/hardware/src/ara_soc.sv
@@ -10,7 +10,7 @@ module ara_soc import axi_pkg::*; import ara_pkg::*; #(
     // RVV Parameters
     parameter  int           unsigned NrLanes      = 0,                          // Number of parallel vector lanes.
     // Support for floating-point data types
-    parameter  fpu_support_e          FPUSupport   = FPUSupportHalfSingleDouble,
+    parameter  fpu_support_e          FPUSupport   = FPUSupportAll,
     // External support for vfrec7, vfrsqrt7
     parameter  fpext_support_e        FPExtSupport = FPExtSupportEnable,
     // Support for fixed-point data types
@@ -449,10 +449,10 @@ module ara_soc import axi_pkg::*; import ara_pkg::*; #(
     AxiUserWidth          : 1,
     NrLoadBufEntries      : 2,
     FpuEn                 : 1,
-    XF16                  : FPUSupport[0],
-    XF16ALT               : 0,
-    XF8                   : 0,
-    XF8ALT                : 1,
+    XF16                  : FPUSupport[3],
+    XF16ALT               : FPUSupport[2],
+    XF8                   : FPUSupport[1],
+    XF8ALT                : FPUSupport[0],
     RVA                   : 1,
     RVB                   : 0,
     RVV                   : 1,
@@ -463,8 +463,8 @@ module ara_soc import axi_pkg::*; import ara_pkg::*; #(
     CvxifEn               : 0,
     ZiCondExtEn           : 0,
     RVSCLIC               : 0,
-    RVF                   : FPUSupport[1],
-    RVD                   : FPUSupport[2],
+    RVF                   : FPUSupport[4],
+    RVD                   : FPUSupport[5],
     FpPresent             : 1,
     NSX                   : 0,
     FLen                  : 64,
@@ -614,4 +614,16 @@ module ara_soc import axi_pkg::*; import ara_pkg::*; #(
     $error(
       "[ara] Cannot support half-precision floating-point on Ara if CVA6 does not support it.");
 
+  if (RVVHA(FPUSupport) && !CVA6AraConfig.XF16ALT)
+    $error(
+      "[ara] Cannot support alt-half-precision floating-point on Ara if CVA6 does not support it.");
+
+  if (RVVB(FPUSupport) && !CVA6AraConfig.XF8)
+    $error(
+      "[ara] Cannot support byte-precision floating-point on Ara if CVA6 does not support it.");
+
+  if (RVVBA(FPUSupport) && !CVA6AraConfig.XF8ALT)
+    $error(
+      "[ara] Cannot support alt-byte-precision floating-point on Ara if CVA6 does not support it.");
+
 endmodule : ara_soc
diff --git a/hardware/src/ara_system.sv b/hardware/src/ara_system.sv
@@ -10,7 +10,7 @@ module ara_system import axi_pkg::*; import ara_pkg::*; #(
     // RVV Parameters
     parameter int                      unsigned NrLanes            = 0,                               // Number of parallel vector lanes.
     // Support for floating-point data types
-    parameter fpu_support_e                     FPUSupport         = FPUSupportHalfSingleDouble,
+    parameter fpu_support_e                     FPUSupport         = FPUSupportAll,
     // External support for vfrec7, vfrsqrt7
     parameter fpext_support_e                   FPExtSupport       = FPExtSupportEnable,
     // Support for fixed-point data types

diff --git a/hardware/src/lane/lane.sv b/hardware/src/lane/lane.sv
@@ -12,7 +12,7 @@
 module lane import ara_pkg::*; import rvv_pkg::*; #(
     parameter  int           unsigned NrLanes         = 1, // Number of lanes
     // Support for floating-point data types
-    parameter  fpu_support_e          FPUSupport      = FPUSupportHalfSingleDouble,
+    parameter  fpu_support_e          FPUSupport      = FPUSupportAll,
     // External support for vfrec7, vfrsqrt7
     parameter  fpext_support_e        FPExtSupport    = FPExtSupportEnable,
     // Support for fixed-point data types

diff --git a/hardware/src/lane/operand_queue.sv b/hardware/src/lane/operand_queue.sv
@@ -14,7 +14,7 @@ module operand_queue import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
     parameter  int           unsigned NrSlaves       = 1,
     parameter  int           unsigned NrLanes        = 0,
     // Support for floating-point data types
-    parameter  fpu_support_e          FPUSupport     = FPUSupportHalfSingleDouble,
+    parameter  fpu_support_e          FPUSupport     = FPUSupportAll,
     // Supported conversions
     parameter  logic                  SupportIntExt2 = 1'b0,
     parameter  logic                  SupportIntExt4 = 1'b0,
@@ -153,9 +153,11 @@ module operand_queue import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
   //  Floating-point conversion //
   ////////////////////////////////
 
+  logic [$clog2(fp_mantissa_bits(EW8, 0))-1:0]  fp8_m_lzc[4];  // 2 bits each
   logic [$clog2(fp_mantissa_bits(EW16, 0))-1:0] fp16_m_lzc[2]; // 4 bits each
   logic [$clog2(fp_mantissa_bits(EW32, 0))-1:0] fp32_m_lzc;    // 5 bits each
 
+  fp8_t  fp8[4];
   fp16_t fp16[2];
   fp32_t fp32;
 
@@ -166,6 +168,20 @@ module operand_queue import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
   // By knowing the number of leading zeros in the mantissa, we can properly
   // adjust the exponent and shift the binary point to achieve a normalized
   // representation of the number.
+  if ({RVVB(FPUSupport), RVVH(FPUSupport)} == 2'b11) begin
+    // sew: 8-bit
+    for (genvar i = 0; i < 4; i++) begin
+      lzc #(
+        .WIDTH(fp_mantissa_bits(EW8, 0)),
+        .MODE (1)
+      ) leading_zero_e8_i (
+        .in_i   (fp8[i].m    ),
+        .cnt_o  (fp8_m_lzc[i]),
+        .empty_o(/*Unused*/   )
+      );
+    end
+  end
+
   if ({RVVH(FPUSupport), RVVF(FPUSupport)} == 2'b11) begin
     // sew: 16-bit
     for (genvar i = 0; i < 2; i++) begin
@@ -202,6 +218,7 @@ module operand_queue import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
     incomplete_packet = 1'b0;
     last_packet       = 1'b0;
 
+    for (int i = 0; i < 4; i++) fp8[i]  = '0;
     for (int i = 0; i < 2; i++) fp16[i] = '0;
     for (int i = 0; i < 1; i++) fp32[i] = '0;
 
@@ -243,6 +260,13 @@ module operand_queue import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
           end
           MFPU_ADDRGEN: begin
             unique case (cmd.eew)
+              EW8: begin
+                unique case (cmd.ntr_red)
+                  2'b01: ntr.w64 = {8{16'h78}};
+                  2'b10: ntr.w64 = {8{16'hf8}};
+                  default:;
+                endcase
+              end
               EW16: begin
                 unique case (cmd.ntr_red)
                   2'b01: ntr.w64 = {4{16'h7c00}};
@@ -371,17 +395,31 @@ module operand_queue import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
         end
       end
 
-      // Floating-Point re-encoding
+      // Floating-Point re-encoding (not supported for alt-16 and alt-8)
       OpQueueConversionWideFP2: begin
         if (FPUSupport != FPUSupportNone) begin
-          unique casez ({cmd.eew, RVVH(FPUSupport), RVVF(FPUSupport), RVVD(FPUSupport)})
-            {EW16, 1'b1, 1'b1, 1'b?}: begin
+          unique casez ({cmd.eew, RVVBA(FPUSupport), RVVB(FPUSupport),
+                                  RVVHA(FPUSupport), RVVH(FPUSupport),
+                                  RVVF(FPUSupport),  RVVD(FPUSupport)})
+            {EW8, 1'b?, 1'b1, 1'b?, 1'b1, 1'b?, 1'b?}: begin
+              for (int e = 0; e < 1; e++) begin
+                automatic fp8_t fp8 = ibuf_operand[8*select + 16*e +: 8];
+                automatic fp16_t fp16;
+
+                fp16.s = fp8.s;
+                fp16.e = (fp8.e - 7) + 15;
+                fp16.m = {fp8.m, 7'b0};
+
+                conv_operand[16*e +: 16] = fp16;
+              end
+            end
+            {EW16, 1'b?, 1'b?, 1'b?, 1'b1, 1'b1, 1'b?}: begin
               for (int e = 0; e < 2; e++) begin
                 fp16[e] = ibuf_operand[8*select + 32*e +: 16];
                 conv_operand[32*e +: 32] = fp32_from_fp16(fp16[e], fp16_m_lzc[e]);
               end
             end
-            {EW32, 1'b?, 1'b1, 1'b1}: begin
+            {EW32, 1'b?, 1'b?, 1'b?, 1'b?, 1'b1, 1'b1}: begin
               fp32 = ibuf_operand[8*select +: 32];
               conv_operand = fp64_from_fp32(fp32, fp32_m_lzc);
             end
@@ -393,6 +431,7 @@ module operand_queue import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
       // Zero extension + Reordering for FP conversions
       OpQueueAdjustFPCvt: begin
         unique case (cmd.eew)
+          EW8:  conv_operand = {32'b0, ibuf_operand[16 + 8*select +: 8], ibuf_operand[16 + 8*select +: 8], ibuf_operand[32 + 8*select +: 8], ibuf_operand[8*select +: 8]};
           EW16: conv_operand = {32'b0, ibuf_operand[32 + 8*select +: 16], ibuf_operand[8*select +: 16]};
           EW32: conv_operand = {32'b0, ibuf_operand[8*select +: 32]};
           default:;

diff --git a/hardware/src/lane/operand_queues_stage.sv b/hardware/src/lane/operand_queues_stage.sv
@@ -9,7 +9,7 @@
 module operand_queues_stage import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width; #(
     parameter int     unsigned NrLanes = 0,
     // Support for floating-point data types
-    parameter fpu_support_e FPUSupport = FPUSupportHalfSingleDouble
+    parameter fpu_support_e FPUSupport = FPUSupportAll
   ) (
     input  logic                                     clk_i,
     input  logic                                     rst_ni,

diff --git a/hardware/src/lane/vector_fus_stage.sv b/hardware/src/lane/vector_fus_stage.sv
@@ -10,7 +10,7 @@
 module vector_fus_stage import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width; #(
     parameter  int           unsigned NrLanes      = 0,
     // Support for floating-point data types
-    parameter  fpu_support_e          FPUSupport   = FPUSupportHalfSingleDouble,
+    parameter  fpu_support_e          FPUSupport   = FPUSupportAll,
     // External support for vfrec7, vfrsqrt7
     parameter  fpext_support_e        FPExtSupport = FPExtSupportEnable,
     // Support for fixed-point data types