Skip to content

Commit

Permalink
[hardware] Add support for 16alt, 8, 8alt
Browse files Browse the repository at this point in the history
  • Loading branch information
mp-17 committed Jun 26, 2024
1 parent 9992f90 commit de673f6
Show file tree
Hide file tree
Showing 10 changed files with 229 additions and 48 deletions.
79 changes: 67 additions & 12 deletions hardware/include/ara_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -45,29 +45,42 @@ package ara_pkg;
FPExtSupportEnable = 1'b1
} fpext_support_e;

// The three bits correspond to {RVVD, RVVF, RVVH}
typedef enum logic [2:0] {
FPUSupportNone = 3'b000,
FPUSupportHalf = 3'b001,
FPUSupportSingle = 3'b010,
FPUSupportHalfSingle = 3'b011,
FPUSupportDouble = 3'b100,
FPUSupportSingleDouble = 3'b110,
FPUSupportHalfSingleDouble = 3'b111
// The six bits correspond to {RVVD, RVVF, RVVH, RVVHA, RVVB, RVVBA}
typedef enum logic [5:0] {
FPUSupportNone = 6'b000000,
FPUSupportHalf = 6'b001000,
FPUSupportSingle = 6'b010000,
FPUSupportHalfSingle = 6'b011000,
FPUSupportDouble = 6'b100000,
FPUSupportSingleDouble = 6'b110000,
FPUSupportHalfSingleDouble = 6'b111000,
FPUSupportAll = 6'b111111
} fpu_support_e;

function automatic logic RVVD(fpu_support_e e);
return e[2];
return e[5];
endfunction : RVVD

function automatic logic RVVF(fpu_support_e e);
return e[1];
return e[4];
endfunction : RVVF

function automatic logic RVVH(fpu_support_e e);
return e[0];
return e[3];
endfunction : RVVH

function automatic logic RVVHA(fpu_support_e e);
return e[2];
endfunction : RVVHA

function automatic logic RVVB(fpu_support_e e);
return e[1];
endfunction : RVVB

function automatic logic RVVBA(fpu_support_e e);
return e[0];
endfunction : RVVBA

// Multiplier latencies.
localparam int unsigned LatMultiplierEW64 = 1;
localparam int unsigned LatMultiplierEW32 = 1;
Expand Down Expand Up @@ -218,6 +231,24 @@ package ara_pkg;
} resize_e;

// Floating-Point structs for re-encoding during widening FP operations
typedef struct packed {
logic s;
logic [3:0] e;
logic [2:0] m;
} fp8alt_t;

typedef struct packed {
logic s;
logic [4:0] e;
logic [1:0] m;
} fp8_t;

typedef struct packed {
logic s;
logic [7:0] e;
logic [6:0] m;
} fp16alt_t;

typedef struct packed {
logic s;
logic [4:0] e;
Expand Down Expand Up @@ -248,6 +279,30 @@ package ara_pkg;
endcase
endfunction

function fp16_t fp16_from_fp8(fp8_t fp8, logic [$clog2(fp_mantissa_bits(rvv_pkg::EW8, 0)):0] fp8_m_lzc);
automatic fp8_t fp8_temp;
automatic fp16_t fp16;

// Wide sign
fp16.s = fp8.s;

// Wide exponent
// 15 - 7 = 8
unique case(fp8.e)
'0: fp16.e = (fp8.m == '0) ? '0 : 5'd8 - {3'd0, fp8_m_lzc}; // Zero or Subnormal
'1: fp16.e = '1; // NaN
default: fp16.e = 5'd8 + fp8.e; // Normal
endcase

// Wide mantissa
// If the input is NaN, output a quiet NaN mantissa.
// Otherwise, append trailing zeros to the mantissa.
fp8_temp.m = ((fp8.e == '0) && (fp8.m != '0)) ? (fp8.m << 1) << fp8_m_lzc : fp8.m;
fp16.m = ((fp8.e == '1) && (fp8.m != '0) ) ? {1'b1, 9'b0} : {fp8_temp.m, 8'b0};

fp16_from_fp8 = fp16;
endfunction

function fp32_t fp32_from_fp16(fp16_t fp16, logic [$clog2(fp_mantissa_bits(rvv_pkg::EW16, 0)):0] fp16_m_lzc);
automatic fp16_t fp16_temp;
automatic fp32_t fp32;
Expand Down
2 changes: 1 addition & 1 deletion hardware/src/ara.sv
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ module ara import ara_pkg::*; #(
// RVV Parameters
parameter int unsigned NrLanes = 0, // Number of parallel vector lanes.
// Support for floating-point data types
parameter fpu_support_e FPUSupport = FPUSupportHalfSingleDouble,
parameter fpu_support_e FPUSupport = FPUSupportAll,
// External support for vfrec7, vfrsqrt7
parameter fpext_support_e FPExtSupport = FPExtSupportEnable,
// Support for fixed-point data types
Expand Down
7 changes: 5 additions & 2 deletions hardware/src/ara_dispatcher.sv
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
parameter int unsigned NrLanes = 0,
// Support for floating-point data types
parameter fpu_support_e FPUSupport = FPUSupportHalfSingleDouble,
parameter fpu_support_e FPUSupport = FPUSupportAll,
// External support for vfrec7, vfrsqrt7
parameter fpext_support_e FPExtSupport = FPExtSupportEnable,
// Support for fixed-point data types
Expand Down Expand Up @@ -2230,9 +2230,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
end
end

// Ara can support 16-bit float, 32-bit float, 64-bit float.
// Ara can support 8-bit float, 16-bit float, 32-bit float, 64-bit float.
// Ara cannot support instructions who operates on more than 64 bits.
unique case (FPUSupport)
FPUSupportAll: if (int'(ara_req_d.vtype.vsew) > int'(EW64) || int'(ara_req_d.eew_vs2) > int'(EW64))
illegal_insn = 1'b1;
FPUSupportHalfSingleDouble: if (int'(ara_req_d.vtype.vsew) < int'(EW16) ||
int'(ara_req_d.vtype.vsew) > int'(EW64) || int'(ara_req_d.eew_vs2) > int'(EW64))
illegal_insn = 1'b1;
Expand Down Expand Up @@ -2476,6 +2478,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
// Ara can support 16-bit float, 32-bit float, 64-bit float.
// Ara cannot support instructions who operates on more than 64 bits.
unique case (FPUSupport)
FPUSupportAll: if (int'(ara_req_d.vtype.vsew) > int'(EW64)) illegal_insn = 1'b1;
FPUSupportHalfSingleDouble: if (int'(ara_req_d.vtype.vsew) < int'(EW16) ||
int'(ara_req_d.vtype.vsew) > int'(EW64)) illegal_insn = 1'b1;
FPUSupportHalfSingle: if (int'(ara_req_d.vtype.vsew) < int'(EW16) ||
Expand Down
26 changes: 19 additions & 7 deletions hardware/src/ara_soc.sv
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ module ara_soc import axi_pkg::*; import ara_pkg::*; #(
// RVV Parameters
parameter int unsigned NrLanes = 0, // Number of parallel vector lanes.
// Support for floating-point data types
parameter fpu_support_e FPUSupport = FPUSupportHalfSingleDouble,
parameter fpu_support_e FPUSupport = FPUSupportAll,
// External support for vfrec7, vfrsqrt7
parameter fpext_support_e FPExtSupport = FPExtSupportEnable,
// Support for fixed-point data types
Expand Down Expand Up @@ -449,10 +449,10 @@ module ara_soc import axi_pkg::*; import ara_pkg::*; #(
AxiUserWidth : 1,
NrLoadBufEntries : 2,
FpuEn : 1,
XF16 : FPUSupport[0],
XF16ALT : 0,
XF8 : 0,
XF8ALT : 1,
XF16 : FPUSupport[3],
XF16ALT : FPUSupport[2],
XF8 : FPUSupport[1],
XF8ALT : FPUSupport[0],
RVA : 1,
RVB : 0,
RVV : 1,
Expand All @@ -463,8 +463,8 @@ module ara_soc import axi_pkg::*; import ara_pkg::*; #(
CvxifEn : 0,
ZiCondExtEn : 0,
RVSCLIC : 0,
RVF : FPUSupport[1],
RVD : FPUSupport[2],
RVF : FPUSupport[4],
RVD : FPUSupport[5],
FpPresent : 1,
NSX : 0,
FLen : 64,
Expand Down Expand Up @@ -614,4 +614,16 @@ module ara_soc import axi_pkg::*; import ara_pkg::*; #(
$error(
"[ara] Cannot support half-precision floating-point on Ara if CVA6 does not support it.");

if (RVVHA(FPUSupport) && !CVA6AraConfig.XF16ALT)
$error(
"[ara] Cannot support alt-half-precision floating-point on Ara if CVA6 does not support it.");

if (RVVB(FPUSupport) && !CVA6AraConfig.XF8)
$error(
"[ara] Cannot support byte-precision floating-point on Ara if CVA6 does not support it.");

if (RVVBA(FPUSupport) && !CVA6AraConfig.XF8ALT)
$error(
"[ara] Cannot support alt-byte-precision floating-point on Ara if CVA6 does not support it.");

endmodule : ara_soc
2 changes: 1 addition & 1 deletion hardware/src/ara_system.sv
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ module ara_system import axi_pkg::*; import ara_pkg::*; #(
// RVV Parameters
parameter int unsigned NrLanes = 0, // Number of parallel vector lanes.
// Support for floating-point data types
parameter fpu_support_e FPUSupport = FPUSupportHalfSingleDouble,
parameter fpu_support_e FPUSupport = FPUSupportAll,
// External support for vfrec7, vfrsqrt7
parameter fpext_support_e FPExtSupport = FPExtSupportEnable,
// Support for fixed-point data types
Expand Down
2 changes: 1 addition & 1 deletion hardware/src/lane/lane.sv
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
module lane import ara_pkg::*; import rvv_pkg::*; #(
parameter int unsigned NrLanes = 1, // Number of lanes
// Support for floating-point data types
parameter fpu_support_e FPUSupport = FPUSupportHalfSingleDouble,
parameter fpu_support_e FPUSupport = FPUSupportAll,
// External support for vfrec7, vfrsqrt7
parameter fpext_support_e FPExtSupport = FPExtSupportEnable,
// Support for fixed-point data types
Expand Down
49 changes: 44 additions & 5 deletions hardware/src/lane/operand_queue.sv
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ module operand_queue import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
parameter int unsigned NrSlaves = 1,
parameter int unsigned NrLanes = 0,
// Support for floating-point data types
parameter fpu_support_e FPUSupport = FPUSupportHalfSingleDouble,
parameter fpu_support_e FPUSupport = FPUSupportAll,
// Supported conversions
parameter logic SupportIntExt2 = 1'b0,
parameter logic SupportIntExt4 = 1'b0,
Expand Down Expand Up @@ -153,9 +153,11 @@ module operand_queue import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
// Floating-point conversion //
////////////////////////////////

logic [$clog2(fp_mantissa_bits(EW8, 0))-1:0] fp8_m_lzc[4]; // 2 bits each
logic [$clog2(fp_mantissa_bits(EW16, 0))-1:0] fp16_m_lzc[2]; // 4 bits each
logic [$clog2(fp_mantissa_bits(EW32, 0))-1:0] fp32_m_lzc; // 5 bits each

fp8_t fp8[4];
fp16_t fp16[2];
fp32_t fp32;

Expand All @@ -166,6 +168,20 @@ module operand_queue import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
// By knowing the number of leading zeros in the mantissa, we can properly
// adjust the exponent and shift the binary point to achieve a normalized
// representation of the number.
if ({RVVB(FPUSupport), RVVH(FPUSupport)} == 2'b11) begin
// sew: 8-bit
for (genvar i = 0; i < 4; i++) begin
lzc #(
.WIDTH(fp_mantissa_bits(EW8, 0)),
.MODE (1)
) leading_zero_e8_i (
.in_i (fp8[i].m ),
.cnt_o (fp8_m_lzc[i]),
.empty_o(/*Unused*/ )
);
end
end

if ({RVVH(FPUSupport), RVVF(FPUSupport)} == 2'b11) begin
// sew: 16-bit
for (genvar i = 0; i < 2; i++) begin
Expand Down Expand Up @@ -202,6 +218,7 @@ module operand_queue import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
incomplete_packet = 1'b0;
last_packet = 1'b0;

for (int i = 0; i < 4; i++) fp8[i] = '0;
for (int i = 0; i < 2; i++) fp16[i] = '0;
for (int i = 0; i < 1; i++) fp32[i] = '0;

Expand Down Expand Up @@ -243,6 +260,13 @@ module operand_queue import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
end
MFPU_ADDRGEN: begin
unique case (cmd.eew)
EW8: begin
unique case (cmd.ntr_red)
2'b01: ntr.w64 = {8{16'h78}};
2'b10: ntr.w64 = {8{16'hf8}};
default:;
endcase
end
EW16: begin
unique case (cmd.ntr_red)
2'b01: ntr.w64 = {4{16'h7c00}};
Expand Down Expand Up @@ -371,17 +395,31 @@ module operand_queue import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
end
end

// Floating-Point re-encoding
// Floating-Point re-encoding (not supported for alt-16 and alt-8)
OpQueueConversionWideFP2: begin
if (FPUSupport != FPUSupportNone) begin
unique casez ({cmd.eew, RVVH(FPUSupport), RVVF(FPUSupport), RVVD(FPUSupport)})
{EW16, 1'b1, 1'b1, 1'b?}: begin
unique casez ({cmd.eew, RVVBA(FPUSupport), RVVB(FPUSupport),
RVVHA(FPUSupport), RVVH(FPUSupport),
RVVF(FPUSupport), RVVD(FPUSupport)})
{EW8, 1'b?, 1'b1, 1'b?, 1'b1, 1'b?, 1'b?}: begin
for (int e = 0; e < 1; e++) begin
automatic fp8_t fp8 = ibuf_operand[8*select + 16*e +: 8];
automatic fp16_t fp16;

fp16.s = fp8.s;
fp16.e = (fp8.e - 7) + 15;
fp16.m = {fp8.m, 7'b0};

conv_operand[16*e +: 16] = fp16;
end
end
{EW16, 1'b?, 1'b?, 1'b?, 1'b1, 1'b1, 1'b?}: begin
for (int e = 0; e < 2; e++) begin
fp16[e] = ibuf_operand[8*select + 32*e +: 16];
conv_operand[32*e +: 32] = fp32_from_fp16(fp16[e], fp16_m_lzc[e]);
end
end
{EW32, 1'b?, 1'b1, 1'b1}: begin
{EW32, 1'b?, 1'b?, 1'b?, 1'b?, 1'b1, 1'b1}: begin
fp32 = ibuf_operand[8*select +: 32];
conv_operand = fp64_from_fp32(fp32, fp32_m_lzc);
end
Expand All @@ -393,6 +431,7 @@ module operand_queue import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
// Zero extension + Reordering for FP conversions
OpQueueAdjustFPCvt: begin
unique case (cmd.eew)
EW8: conv_operand = {32'b0, ibuf_operand[16 + 8*select +: 8], ibuf_operand[16 + 8*select +: 8], ibuf_operand[32 + 8*select +: 8], ibuf_operand[8*select +: 8]};
EW16: conv_operand = {32'b0, ibuf_operand[32 + 8*select +: 16], ibuf_operand[8*select +: 16]};
EW32: conv_operand = {32'b0, ibuf_operand[8*select +: 32]};
default:;
Expand Down
2 changes: 1 addition & 1 deletion hardware/src/lane/operand_queues_stage.sv
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
module operand_queues_stage import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width; #(
parameter int unsigned NrLanes = 0,
// Support for floating-point data types
parameter fpu_support_e FPUSupport = FPUSupportHalfSingleDouble
parameter fpu_support_e FPUSupport = FPUSupportAll
) (
input logic clk_i,
input logic rst_ni,
Expand Down
2 changes: 1 addition & 1 deletion hardware/src/lane/vector_fus_stage.sv
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
module vector_fus_stage import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width; #(
parameter int unsigned NrLanes = 0,
// Support for floating-point data types
parameter fpu_support_e FPUSupport = FPUSupportHalfSingleDouble,
parameter fpu_support_e FPUSupport = FPUSupportAll,
// External support for vfrec7, vfrsqrt7
parameter fpext_support_e FPExtSupport = FPExtSupportEnable,
// Support for fixed-point data types
Expand Down
Loading

0 comments on commit de673f6

Please sign in to comment.