diff --git a/.gitignore b/.gitignore index bdf1e1b..a63d836 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ .bender scripts/compile.tcl -models/s27ks0641 \ No newline at end of file +axi_log/ +work/ +transcript +modelsim.ini +vsim.wlf +models/s27ks0641 diff --git a/Bender.yml b/Bender.yml index 905e54d..e8af3d2 100644 --- a/Bender.yml +++ b/Bender.yml @@ -34,11 +34,16 @@ sources: - src/hyperbus_phy2r.sv - src/hyperbus_ddr_out.sv - src/hyperbus_delay.sv + - src/hyperbus_rwds_sampler.sv - src/hyperbus_trx.sv - src/hyperbus_cfg_regs.sv - src/hyperbus_phy.sv - src/hyperbus_phy_if.sv - src/hyperbus_axi.sv + - target: xilinx + files: + - target/xilinx/hyperbus_clk_delay.sv + - target/xilinx/hyperbus_rwds_delay.sv - target: hyper_test files: # Device models. TODO: extend @@ -47,5 +52,6 @@ sources: - test/fixture_hyperbus.sv - test/hyperbus_tb.sv - test/dut_if.sv + - test/hyperbus_tb_pkg.sv - test/axi_hyper_tb.sv - src/hyperbus.sv diff --git a/models/configurable_delay.behav.sv b/models/configurable_delay.behav.sv index f000ede..bc445ab 100644 --- a/models/configurable_delay.behav.sv +++ b/models/configurable_delay.behav.sv @@ -13,8 +13,6 @@ // Automatically generated by the Generic Delay generator. `timescale 1ps/1ps -(* no_ungroup *) -(* no_boundary_optimization *) module configurable_delay #( parameter int unsigned NUM_STEPS, // The desired number of delay taps. Must be // a power of 2. Don't use very large values @@ -23,17 +21,18 @@ module configurable_delay #( // will not work. localparam DELAY_SEL_WIDTH = $clog2(NUM_STEPS) ) ( - input logic clk_i, - input logic enable_i, + input logic clk_i, input logic [DELAY_SEL_WIDTH-1:0] delay_i, output logic clk_o ); - logic enable_latched; logic clk; assign clk = clk_i; - always @(clk) clk_o <= #(real'(delay_i)*3.750ns/15) clk; + // The standard delay line is expected to have 32 taps with ~78ps per tap + // This conforms to the Xilinx IDELAYE2 with a 200MHz reference clock + // The total delay range is thus ~2.5ns + always @(clk) clk_o <= #(real'(delay_i)*78ps + 10ps) clk; endmodule diff --git a/models/configurable_delay.fpga.sv b/models/configurable_delay.fpga.sv index 11be16c..2b6bcfd 100644 --- a/models/configurable_delay.fpga.sv +++ b/models/configurable_delay.fpga.sv @@ -11,8 +11,6 @@ `timescale 1ps/1ps -(* no_ungroup *) -(* no_boundary_optimization *) module configurable_delay #( parameter int unsigned NUM_STEPS, // The desired number of delay taps. Must be // a power of 2. Don't use very large values diff --git a/src/hyperbus.sv b/src/hyperbus.sv index cf7212d..da287fa 100644 --- a/src/hyperbus.sv +++ b/src/hyperbus.sv @@ -7,37 +7,39 @@ // Luca Valente module hyperbus #( - parameter int unsigned NumChips = -1, - parameter int unsigned NumPhys = 2, - parameter int unsigned IsClockODelayed = 0, - parameter int unsigned AxiAddrWidth = -1, - parameter int unsigned AxiDataWidth = -1, - parameter int unsigned AxiIdWidth = -1, - parameter int unsigned AxiUserWidth = -1, - parameter type axi_req_t = logic, - parameter type axi_rsp_t = logic, - parameter type axi_w_chan_t = logic, - parameter type axi_b_chan_t = logic, - parameter type axi_ar_chan_t = logic, - parameter type axi_r_chan_t = logic, - parameter type axi_aw_chan_t = logic, - parameter int unsigned RegAddrWidth = -1, - parameter int unsigned RegDataWidth = -1, + parameter int unsigned NumChips = -1, + parameter int unsigned NumPhys = 2, + parameter bit UsePhyClkDivider = 1, + parameter int unsigned AxiAddrWidth = -1, + parameter int unsigned AxiDataWidth = -1, + parameter int unsigned AxiIdWidth = -1, + parameter int unsigned AxiUserWidth = -1, + parameter type axi_req_t = logic, + parameter type axi_rsp_t = logic, + parameter type axi_w_chan_t = logic, + parameter type axi_b_chan_t = logic, + parameter type axi_ar_chan_t = logic, + parameter type axi_r_chan_t = logic, + parameter type axi_aw_chan_t = logic, + parameter int unsigned RegAddrWidth = -1, + parameter int unsigned RegDataWidth = -1, parameter int unsigned MinFreqMHz = 100, - parameter type reg_req_t = logic, - parameter type reg_rsp_t = logic, - parameter type axi_rule_t = logic, + parameter type reg_req_t = logic, + parameter type reg_rsp_t = logic, + parameter type axi_rule_t = logic, // The below have sensible defaults, but should be set on integration! - parameter int unsigned RxFifoLogDepth = 2, - parameter int unsigned TxFifoLogDepth = 2, + parameter int unsigned RxFifoLogDepth = 3, + parameter int unsigned TxFifoLogDepth = 3, parameter logic [RegDataWidth-1:0] RstChipBase = 'h0, // Base address for all chips parameter logic [RegDataWidth-1:0] RstChipSpace = 'h1_0000, // 64 KiB: Current maximum HyperBus device size parameter hyperbus_pkg::hyper_cfg_t RstCfg = hyperbus_pkg::gen_RstCfg(NumPhys,MinFreqMHz), parameter int unsigned PhyStartupCycles = 300 * 200, /* us*MHz */ // Conservative maximum frequency estimate - parameter int unsigned AxiLogDepth = 3, parameter int unsigned SyncStages = 2 ) ( input logic clk_phy_i, +`ifdef TARGET_XILINX + input logic clk_ref200_i, // only used for Xilinx delay lines +`endif input logic rst_phy_ni, input logic clk_sys_i, input logic rst_sys_ni, @@ -80,7 +82,7 @@ module hyperbus #( } tf_cdc_t; - logic clk_phy_i_0, clk_phy_i_90, rst_phy; + logic clk_phy_0, clk_phy_90, rst_phy; // Register file hyperbus_pkg::hyper_cfg_t cfg; @@ -179,18 +181,37 @@ module hyperbus #( .trans_active_o ( trans_active ) ); + if(UsePhyClkDivider == 1'b1) begin : clock_generator + hyperbus_clk_gen ddr_clk ( + .clk_i ( clk_phy_i ), + .rst_ni ( rst_phy_ni ), + .clk0_o ( clk_phy_0 ), + .clk90_o ( clk_phy_90 ), + .clk180_o ( ), + .clk270_o ( ), + .rst_no ( rst_phy ) + ); + end else begin + assign clk_phy_0 = clk_phy_i; + assign clk_phy_90 = '0; + assign rst_phy = rst_phy_ni; + end + hyperbus_phy_if #( - .IsClockODelayed( IsClockODelayed ), - .NumChips ( NumChips ), - .StartupCycles ( PhyStartupCycles ), - .NumPhys ( NumPhys ), - .hyper_rx_t ( hyper_rx_t ), - .hyper_tx_t ( hyper_tx_t ), - .SyncStages ( SyncStages ) + .UsePhyClkDivider ( UsePhyClkDivider ), + .NumChips ( NumChips ), + .NumPhys ( NumPhys ), + .StartupCycles ( PhyStartupCycles ), + .hyper_rx_t ( hyper_rx_t ), + .hyper_tx_t ( hyper_tx_t ), + .SyncStages ( SyncStages ) ) i_phy ( - .clk_i ( clk_phy_i_0 ), - .clk_i_90 ( clk_phy_i_90 ), - .rst_ni ( rst_phy ), + .clk_phy_i ( clk_phy_0 ), + .clk_phy_i_90 ( clk_phy_90 ), +`ifdef TARGET_XILINX + .clk_ref200_i ( clk_ref200_i ), +`endif + .rst_phy_ni ( rst_phy ), .test_mode_i ( test_mode_i ), .cfg_i ( cfg ), @@ -231,7 +252,7 @@ module hyperbus #( .src_ready_o ( axi_trans_ready ), .dst_rst_ni ( rst_phy ), - .dst_clk_i ( clk_phy_i_0 ), + .dst_clk_i ( clk_phy_0 ), .dst_data_o ( phy_tf_cdc ), .dst_valid_o ( phy_trans_valid ), .dst_ready_i ( phy_trans_ready ) @@ -241,7 +262,7 @@ module hyperbus #( .T ( logic ) ) i_cdc_2phase_b ( .src_rst_ni ( rst_phy ), - .src_clk_i ( clk_phy_i_0 ), + .src_clk_i ( clk_phy_0 ), .src_data_i ( phy_b_error ), .src_valid_i ( phy_b_valid ), .src_ready_o ( phy_b_ready ), @@ -265,7 +286,7 @@ module hyperbus #( .src_ready_o ( axi_tx_ready ), .dst_rst_ni ( rst_phy ), - .dst_clk_i ( clk_phy_i_0 ), + .dst_clk_i ( clk_phy_0 ), .dst_data_o ( phy_tx ), .dst_valid_o ( phy_tx_valid ), .dst_ready_i ( phy_tx_ready ) @@ -277,7 +298,7 @@ module hyperbus #( .LOG_DEPTH ( RxFifoLogDepth ) ) i_cdc_fifo_rx ( .src_rst_ni ( rst_phy ), - .src_clk_i ( clk_phy_i_0 ), + .src_clk_i ( clk_phy_0 ), .src_data_i ( phy_rx ), .src_valid_i ( phy_rx_valid ), .src_ready_o ( phy_rx_ready ), @@ -289,27 +310,5 @@ module hyperbus #( .dst_ready_i ( axi_rx_ready ) ); - // Shift clock by 90 degrees - generate - if(IsClockODelayed==0) begin : clock_generator - hyperbus_clk_gen ddr_clk ( - .clk_i ( clk_phy_i ), - .rst_ni ( rst_phy_ni ), - .clk0_o ( clk_phy_i_0 ), - .clk90_o ( clk_phy_i_90 ), - .clk180_o ( ), - .clk270_o ( ), - .rst_no ( rst_phy ) - ); - end else if (IsClockODelayed==1) begin - assign clk_phy_i_0 = clk_phy_i; - assign rst_phy = rst_phy_ni; - hyperbus_delay i_delay_tx_clk_90 ( - .in_i ( clk_phy_i_0 ), - .delay_i ( cfg.t_tx_clk_delay ), - .out_o ( clk_phy_i_90 ) - ); - end - endgenerate endmodule : hyperbus diff --git a/src/hyperbus_axi.sv b/src/hyperbus_axi.sv index ca78ed2..a54012b 100644 --- a/src/hyperbus_axi.sv +++ b/src/hyperbus_axi.sv @@ -170,7 +170,7 @@ module hyperbus_axi #( // ============================ axi_fifo #( - .Depth ( 4 ), + .Depth ( 8 ), .FallThrough ( 1'b0 ), .aw_chan_t ( axi_fifo_aw_chan_t ), .w_chan_t ( axi_fifo_w_chan_t ), @@ -420,7 +420,7 @@ module hyperbus_axi #( stream_fifo #( .FALL_THROUGH ( 1'b0 ), .T ( axi_w_chan_t ), - .DEPTH ( 8 ) + .DEPTH ( 16 ) ) wchan_stream_fifo ( .clk_i, .rst_ni, diff --git a/src/hyperbus_cfg_regs.sv b/src/hyperbus_cfg_regs.sv index d438007..bafb747 100644 --- a/src/hyperbus_cfg_regs.sv +++ b/src/hyperbus_cfg_regs.sv @@ -30,10 +30,10 @@ module hyperbus_cfg_regs #( `include "common_cells/registers.svh" // Internal Parameters - localparam int unsigned NumBaseRegs = 11; + localparam int unsigned NumBaseRegs = 13; localparam int unsigned NumRegs = 2*NumChips + NumBaseRegs; localparam int unsigned RegsBits = cf_math_pkg::idx_width(NumRegs); - localparam int unsigned RegStrbWidth = RegDataWidth/8; // TODO ASSERT: Must be power of two >= 16!! + localparam int unsigned RegStrbWidth = RegDataWidth/8; // Data and index types typedef logic [RegsBits-1:0] reg_idx_t; @@ -59,6 +59,8 @@ module hyperbus_cfg_regs #( if (sel_reg_mapped) begin rfield = { crange_q, + reg_data_t'(cfg_q.rwds_sample), + reg_data_t'(cfg_q.csn_to_ck_cycles), reg_data_t'(cfg_q.t_csh_cycles), reg_data_t'(cfg_q.which_phy), reg_data_t'(cfg_q.phys_in_use), @@ -99,6 +101,8 @@ module hyperbus_cfg_regs #( 'h8: cfg_d.phys_in_use = (NumPhys==1) ? 0 : ( (~wmask & cfg_q.phys_in_use ) | (wmask & reg_req_i.wdata) ); 'h9: cfg_d.which_phy = (NumPhys==1) ? 0 : ( (~wmask & cfg_q.which_phy ) | (wmask & reg_req_i.wdata) ); 'ha: cfg_d.t_csh_cycles = (~wmask & cfg_q.t_csh_cycles ) | (wmask & reg_req_i.wdata); + 'hb: cfg_d.csn_to_ck_cycles = (~wmask & cfg_q.csn_to_ck_cycles ) | (wmask & reg_req_i.wdata); + 'hc: cfg_d.rwds_sample = (~wmask & cfg_q.rwds_sample ) | (wmask & reg_req_i.wdata); default: begin {sel_chip, chip_reg} = sel_reg - NumBaseRegs; crange_d[sel_chip][chip_reg] = (~wmask & crange_q[sel_chip][chip_reg]) | (wmask & reg_req_i.wdata); diff --git a/src/hyperbus_clk_gen.sv b/src/hyperbus_clk_gen.sv index 546ca86..26400c8 100644 --- a/src/hyperbus_clk_gen.sv +++ b/src/hyperbus_clk_gen.sv @@ -5,6 +5,9 @@ // Hayate Okuhara /// Generates 4 phase shifted clocks out of one faster clock +(* no_ungroup *) +(* no_boundary_optimization *) +(* keep_hierarchy = "yes" *) module hyperbus_clk_gen ( input logic clk_i, // input clock input logic rst_ni, diff --git a/src/hyperbus_clock_diff_out.sv b/src/hyperbus_clock_diff_out.sv index a73ddde..67c6ae6 100644 --- a/src/hyperbus_clock_diff_out.sv +++ b/src/hyperbus_clock_diff_out.sv @@ -6,6 +6,9 @@ // Stephan Keck /// A Hyperbus differential clock output generator. +(* no_ungroup *) +(* no_boundary_optimization *) +(* keep_hierarchy = "yes" *) module hyperbus_clock_diff_out ( input logic in_i, diff --git a/src/hyperbus_delay.sv b/src/hyperbus_delay.sv index e7b8853..a441a3d 100644 --- a/src/hyperbus_delay.sv +++ b/src/hyperbus_delay.sv @@ -5,19 +5,22 @@ // Thomas Benz // Paul Scheffler +(* no_ungroup *) +(* no_boundary_optimization *) +(* keep_hierarchy = "yes" *) module hyperbus_delay ( input logic in_i, - input logic [3:0] delay_i, + input logic [4:0] delay_i, output logic out_o ); + // The standard delay line is expected to have 32 taps with ~78ps per tap + // This conforms to the Xilinx IDELAYE2 with a 200MHz reference clock + // The total delay range is thus ~2.5ns configurable_delay #( - .NUM_STEPS(16) + .NUM_STEPS(32) ) i_delay ( .clk_i ( in_i ), - `ifndef TARGET_ASIC - .enable_i ( 1'b1 ), - `endif .delay_i ( delay_i ), .clk_o ( out_o ) ); diff --git a/src/hyperbus_phy.sv b/src/hyperbus_phy.sv index 78ed27d..1099f00 100644 --- a/src/hyperbus_phy.sv +++ b/src/hyperbus_phy.sv @@ -8,7 +8,6 @@ // Paul Scheffler module hyperbus_phy import hyperbus_pkg::*; #( - parameter int unsigned IsClockODelayed = -1, parameter int unsigned NumChips = 2, parameter int unsigned NumPhys = -1, parameter int unsigned TimerWidth = 16, @@ -98,7 +97,6 @@ module hyperbus_phy import hyperbus_pkg::*; #( logic trx_clk_ena; logic trx_cs_ena; logic trx_rwds_sample; - logic trx_rwds_sample_ena; logic [15:0] trx_tx_data; logic trx_tx_data_oe; logic [1:0] trx_tx_rwds; @@ -114,7 +112,6 @@ module hyperbus_phy import hyperbus_pkg::*; #( // ================= hyperbus_trx #( - .IsClockODelayed( IsClockODelayed ), .NumChips ( NumChips ), .RxFifoLogDepth ( RxFifoLogDepth ), .SyncStages ( SyncStages ) @@ -123,22 +120,23 @@ module hyperbus_phy import hyperbus_pkg::*; #( .clk_i_90, .rst_ni, .test_mode_i, - .cs_i ( cs_q ), - .cs_ena_i ( trx_cs_ena ), - .rwds_sample_o ( trx_rwds_sample ), - .rwds_sample_ena_i ( trx_rwds_sample_ena ), - .tx_clk_delay_i ( cfg_i.t_tx_clk_delay ), - .tx_clk_ena_i ( trx_clk_ena ), - .tx_data_i ( trx_tx_data ), - .tx_data_oe_i ( trx_tx_data_oe ), - .tx_rwds_i ( trx_tx_rwds ), - .tx_rwds_oe_i ( trx_tx_rwds_oe ), - .rx_clk_delay_i ( cfg_i.t_rx_clk_delay ), - .rx_clk_set_i ( trx_rx_clk_set ), - .rx_clk_reset_i ( trx_rx_clk_reset ), - .rx_data_o ( trx_rx_data ), - .rx_valid_o ( trx_rx_valid ), - .rx_ready_i ( trx_rx_ready ), + .cfg_edge_idx_i ( cfg_i.rwds_sample.cylce_idx ), + .cfg_edge_pol_i ( cfg_i.rwds_sample.polarity ), + .cs_i ( cs_q ), + .cs_ena_i ( trx_cs_ena ), + .rwds_sample_o ( trx_rwds_sample ), + .tx_clk_delay_i ( cfg_i.t_tx_clk_delay ), + .tx_clk_ena_i ( trx_clk_ena ), + .tx_data_i ( trx_tx_data ), + .tx_data_oe_i ( trx_tx_data_oe ), + .tx_rwds_i ( trx_tx_rwds ), + .tx_rwds_oe_i ( trx_tx_rwds_oe ), + .rx_clk_delay_i ( cfg_i.t_rx_clk_delay ), + .rx_clk_set_i ( trx_rx_clk_set ), + .rx_clk_reset_i ( trx_rx_clk_reset ), + .rx_data_o ( trx_rx_data ), + .rx_valid_o ( trx_rx_valid ), + .rx_ready_i ( trx_rx_ready ), .hyper_cs_no, .hyper_ck_o, .hyper_ck_no, @@ -243,7 +241,6 @@ module hyperbus_phy import hyperbus_pkg::*; #( trx_cs_ena = 1'b1; trx_clk_ena = 1'b0; trx_rx_clk_set = 1'b0; - trx_rwds_sample_ena = 1'b0; // Default next state state_d = state_q; timer_d = timer_q - 1; @@ -269,26 +266,42 @@ module hyperbus_phy import hyperbus_pkg::*; #( if (trans_valid_i & ~b_pending_q & r_outstand_q == '0) begin tf_d = trans_i; cs_d = trans_cs_i; - // Send 3 CA words (t_CSS respected through clock delay) - timer_d = 2; - state_d = SendCA; - // Enable output driver (needs to be enabled one cycle - // earlier since tri-state enables of IO pads are quite - // slow compared to the data pins) + + if(cfg_i.csn_to_ck_cycles != 0) begin + // assert CS but delay hyper_ck to allow more time + // for memory to drive RWDS (to satisfy t_DSV) + state_d = DelayCK; + timer_d = cfg_i.csn_to_ck_cycles -1; + end else begin + // max throughput when memory RWDS signal arrives early + state_d = SendCA; + // Send 3 CA words (t_CSS respected through clock delay) + timer_d = 2; + end + + // Enable output driver (needs to be enabled at least + // one cycle earlier since tri-state enables of IO pads + // are quite slow compared to the data pins) trx_tx_data_oe = 1'b1; end end + DelayCK: begin + trx_clk_ena = 1'b0; + if (ctl_timer_zero) begin + timer_d = 2; // Send 3 CA words + state_d = SendCA; + end + end SendCA: begin // Dataflow handled outside FSM trx_clk_ena = 1'b1; trx_tx_data_oe = 1'b1; - trx_rwds_sample_ena = ~ctl_write_zero_lat; if (ctl_timer_zero) begin if (ctl_write_zero_lat) begin timer_d = cfg_i.t_burst_max; state_d = Write; end else begin - timer_d = TimerWidth'(cfg_i.t_latency_access) << ctl_add_latency; + timer_d = TimerWidth'(cfg_i.t_latency_access); state_d = WaitLatAccess; end end @@ -296,18 +309,44 @@ module hyperbus_phy import hyperbus_pkg::*; #( WaitLatAccess: begin trx_clk_ena = 1'b1; trx_tx_data_oe = 1'b1; - // Substract cycle for last CA and another for state delay + // ctl_add_latency may arrive at any time (adjustable RWDS sampling) + // If no additional latency required: + if (~ctl_add_latency) begin + // Substract cycle for last CA and another for state delay + if(ctl_timer_two) begin + timer_d = cfg_i.t_burst_max; + // Switch to write or read phase and already start + // turnaround of tri-state driver (depending on latency + // config and if read or write transaction). + if (tf_q.write) begin + state_d = Write; + trx_tx_data_oe = 1'b1; + // For zero latency writes, we must not drive the RWDS + // signal (see specs page 9). Depending on the latency + // mode we thus drive only the DQ signals or DQ + RWDS. + trx_tx_rwds_oe = ~ctl_write_zero_lat; + end else begin + state_d = Read; + trx_tx_data_oe = 1'b0; + trx_tx_rwds_oe = 1'b0; + end + end + end else if (ctl_timer_one) begin + // instead of going to 0, add another latency count + state_d = WaitAddLatAccess; + timer_d = TimerWidth'(cfg_i.t_latency_access); + end + end + WaitAddLatAccess: begin + // Same as WaitLatAccess but without possibility + // of adding another latency count + trx_clk_ena = 1'b1; + trx_tx_data_oe = 1'b1; if (ctl_timer_two) begin timer_d = cfg_i.t_burst_max; - // Switch to write or read phase and already start - // turnaround of tri-state driver (depending on latency - // config and if read or write transaction). if (tf_q.write) begin state_d = Write; trx_tx_data_oe = 1'b1; - // For zero latency writes, we must not drive the RWDS - // signal (see specs page 9). Depending on the latency - // mode we thus drive only the DQ signals or DQ + RWDS. trx_tx_rwds_oe = ~ctl_write_zero_lat; end else begin state_d = Read; diff --git a/src/hyperbus_phy_if.sv b/src/hyperbus_phy_if.sv index 3203b63..e1d888e 100644 --- a/src/hyperbus_phy_if.sv +++ b/src/hyperbus_phy_if.sv @@ -3,21 +3,23 @@ // SPDX-License-Identifier: SHL-0.51 // // Luca Valente +// Philippe Sauter module hyperbus_phy_if import hyperbus_pkg::*; #( - parameter int unsigned IsClockODelayed = 1, + parameter bit UsePhyClkDivider = 1, parameter int unsigned NumChips = 2, parameter int unsigned NumPhys = 2, - parameter int unsigned TimerWidth = 16, - parameter int unsigned RxFifoLogDepth = 3, parameter int unsigned StartupCycles = 60000, /*MHz*/ // Conservative maximum frequency estimate - parameter int unsigned SyncStages = 2, + parameter int unsigned SyncStages = 2, parameter type hyper_tx_t = logic, parameter type hyper_rx_t = logic )( - input logic clk_i, - input logic clk_i_90, - input logic rst_ni, + input logic clk_phy_i, + input logic clk_phy_i_90, // only used together with divided clock (clk_gen) +`ifdef TARGET_XILINX + input logic clk_ref200_i, // only used with Xilinx delay lines (reference to IDELAY cells) +`endif + input logic rst_phy_ni, input logic test_mode_i, // Config registers input hyper_cfg_t cfg_i, @@ -39,7 +41,7 @@ module hyperbus_phy_if import hyperbus_pkg::*; #( input logic b_ready_i, output logic b_error_o, - // Physical interace: facing HyperBus + // Physical interface: facing HyperBus output logic [NumPhys-1:0][NumChips-1:0] hyper_cs_no, output logic [NumPhys-1:0] hyper_ck_o, output logic [NumPhys-1:0] hyper_ck_no, @@ -52,196 +54,218 @@ module hyperbus_phy_if import hyperbus_pkg::*; #( output logic [NumPhys-1:0] hyper_reset_no ); - phy_rx_t [NumPhys-1:0] phy_fifo_rx; - phy_rx_t [NumPhys-1:0] fifo_axi_rx; - logic [NumPhys-1:0] phy_fifo_valid; - logic [NumPhys-1:0] phy_fifo_ready; - logic [NumPhys-1:0] fifo_axi_valid; - logic fifo_axi_ready; + logic clk_phy_0, clk_phy_90; - logic [NumPhys-1:0][1:0] fifo_axi_usage; + // Shift clock by 90 degrees + if(UsePhyClkDivider == '0) begin : clock_generator + assign clk_phy_0 = clk_phy_i; - logic tx_both_ready, ts_both_ready; - logic rx_both_valid, b_both_valid; + `ifdef TARGET_XILINX + hyperbus_clk_delay i_delay_tx_clk_90 ( + .rst_i ( ~rst_ni ), + .clk_ref200_i, + .clk_i ( clk_phy_i ), + .in_i ( clk_phy_0 ), + .delay_i ( cfg_i.t_tx_clk_delay ), + .out_o ( clk_phy_90 ) + ); + `else + hyperbus_delay i_delay_tx_clk_90 ( + .in_i ( clk_phy_0 ), + .delay_i ( cfg_i.t_tx_clk_delay ), + .out_o ( clk_phy_90 ) + ); + `endif + end else begin + assign clk_phy_0 = clk_phy_i; + assign clk_phy_90 = clk_phy_i_90; + end + + + phy_rx_t [NumPhys-1:0] phy_fifo_rx; + phy_rx_t [NumPhys-1:0] fifo_axi_rx; + logic [NumPhys-1:0] phy_fifo_valid; + logic [NumPhys-1:0] phy_fifo_ready; + logic [NumPhys-1:0] fifo_axi_valid; + logic fifo_axi_ready; - logic [NumPhys-1:0] phy_tx_ready; - logic phy_tx_valid; + logic [NumPhys-1:0][1:0] fifo_axi_usage; - logic [NumPhys-1:0] phy_trans_ready; - logic [NumPhys-1:0] phy_trans_valid; + logic tx_both_ready, ts_both_ready; + logic rx_both_valid, b_both_valid; - logic [NumPhys-1:0] phy_b_valid; - logic [NumPhys-1:0] phy_b_error; - logic phy_b_ready; + logic [NumPhys-1:0] phy_tx_ready; + logic phy_tx_valid; - genvar i; - generate + logic [NumPhys-1:0] phy_trans_ready; + logic [NumPhys-1:0] phy_trans_valid; - if (NumPhys==2) begin : phy_wrap + logic [NumPhys-1:0] phy_b_valid; + logic [NumPhys-1:0] phy_b_error; + logic phy_b_ready; - logic [NumPhys-1:0] phy_enable; - logic [NumPhys-1:0] phy_busy; - logic [NumPhys-1:0] phy_active_q, phy_active_d; - logic change_phy_active; + if (NumPhys==2) begin : phy_wrap - assign change_phy_active = phy_active_q != phy_enable; - assign phy_enable = cfg_i.phys_in_use ? '1 : (1 << cfg_i.which_phy); - assign phy_active_d = change_phy_active && fifo_axi_usage == '0 ? - phy_enable | phy_busy : phy_active_q; + logic [NumPhys-1:0] phy_enable; + logic [NumPhys-1:0] phy_busy; + logic [NumPhys-1:0] phy_active_q, phy_active_d; + logic change_phy_active; - always_ff @(posedge clk_i or negedge rst_ni ) begin - if (!rst_ni) begin - phy_active_q <= '1; - end else begin - phy_active_q <= phy_active_d; - end + assign change_phy_active = phy_active_q != phy_enable; + assign phy_enable = cfg_i.phys_in_use ? '1 : (1 << cfg_i.which_phy); + assign phy_active_d = change_phy_active && fifo_axi_usage == '0 ? + phy_enable | phy_busy : phy_active_q; + + always_ff @(posedge clk_phy_0 or negedge rst_phy_ni ) begin + if (!rst_phy_ni) begin + phy_active_q <= '1; + end else begin + phy_active_q <= phy_active_d; end + end + + assign rx_both_valid = & (fifo_axi_valid | ~phy_active_q); + assign rx_valid_o = rx_both_valid; + assign fifo_axi_ready = rx_ready_i && rx_both_valid; + + assign rx_o.error = | ({fifo_axi_rx[1].error, fifo_axi_rx[0].error} & phy_active_q); + assign rx_o.last = & ({fifo_axi_rx[1].last, fifo_axi_rx[0].last} | ~phy_active_q); + assign tx_both_ready = & (phy_tx_ready | ~phy_active_q); + assign tx_ready_o = tx_both_ready; + assign phy_tx_valid = tx_both_ready && tx_valid_i; + + assign b_both_valid = & (phy_b_valid | ~phy_active_q); + assign b_valid_o = b_both_valid; + assign phy_b_ready = b_ready_i && b_both_valid; + assign b_error_o = | (phy_b_error & phy_active_q); + + assign ts_both_ready = change_phy_active ? '0 : + & (phy_trans_ready | ~phy_active_q); + assign trans_ready_o = ts_both_ready; + assign phy_trans_valid = change_phy_active ? '0 : + phy_trans_ready & {NumPhys{trans_valid_i}} & phy_active_q; + + for ( genvar i=0; i first edge after CS_N is a falling edge }; return cfg; diff --git a/src/hyperbus_rwds_sampler.sv b/src/hyperbus_rwds_sampler.sv new file mode 100644 index 0000000..41da53f --- /dev/null +++ b/src/hyperbus_rwds_sampler.sv @@ -0,0 +1,108 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Philippe Sauter + +// This modules guarantees proper worst-case sampling of RWDS. +// RWDS may only be valid (and stable) for a single period around +// the 3rd hyper_ck_o rising edge (see t_DSV, t_CSS, t_CKDS @ 166MHz). +// Since there may be arbitrary pad and PCBs delays, the sampling edge +// is fully configurable (edge number and polarity). +// A gated clock that is only active around one edge is created +// and then the sample is taken only at the selected edge. +// The final sample is saved into a register in the phy clock domain. +// +// It is not possible to sample on the very first rising clock edge +// with this mechanism. +// Therefore cfg_edge_idx_i = 0 selects the first failling edge +// or the subsequent rising edge, depending on cfg_edge_pol_i. +// With this naming scheme, the default edge should be idx=1, pol=1. +// +// Constraints: +// cfg* signals are pseudostatic (set_false_path -setup or set_multicycle_path) + +`include "common_cells/registers.svh" + +(* no_ungroup *) +(* no_boundary_optimization *) +(* keep_hierarchy = "yes" *) +module hyperbus_rwds_sampler import hyperbus_pkg::*; #() +( + // Global signals + input logic clk_i, // phy clock + input logic rst_ni, + input logic test_mode_i, + + input logic [3:0] cfg_edge_idx_i, // #edge where rwds is sampled + input logic cfg_edge_pol_i, // 1: rising, 0: falling + + // sampled value going to PHY-FSM + output logic rwds_sample_o, + + // physical HyperBus signals + input logic hyper_cs_ni, + input logic hyper_rwds_i +); + + // used to time the sampling of RWDS to determine additional latency + logic tx_clk_180; // inverted clock + logic [4:0] cnt_edge_d, cnt_edge_q; // one bit larger than config + logic cnt_clk; // clock used for edge counting + logic sampling_clk, sampling_clk_gated; // clock used for sampling + logic enable_sampling; // sampling clock gate enable + logic rwds_sample; + + // generate and select clocks + // Sampling is either clocked by un-inverted or inverted 90deg hyperbus clock + // Counter is clocked by the inverse as it controls the clock gate + // which should be on for one cycle with sampling edge in the middle + tc_clk_inverter i_tx_clk_inv ( + .clk_i ( clk_i ), + .clk_o ( tx_clk_180 ) + ); + + tc_clk_mux2 i_sampling_clk_mux ( + .clk0_i ( tx_clk_180 ), + .clk1_i ( clk_i ), + .clk_sel_i ( cfg_edge_pol_i ), + .clk_o ( sampling_clk ) + ); + + tc_clk_inverter i_edge_cnt_clk_inv ( + .clk_i ( sampling_clk ), + .clk_o ( cnt_clk ) + ); + + always_comb begin : gen_edge_cnt + // only count during transfers + if(~hyper_cs_ni) begin + cnt_edge_d = cnt_edge_q +1; + if(cnt_edge_q == '1) begin + cnt_edge_d = cnt_edge_q; // saturating counter + end + end else begin + // reset counter for next transfer + cnt_edge_d = 1'b0; + end + end + + `FF(cnt_edge_q, cnt_edge_d, '0, cnt_clk); + + assign enable_sampling = (cnt_edge_q == cfg_edge_idx_i) & ~hyper_cs_ni; + + // gate the sampling of rwds to the selected clock edge + tc_clk_gating i_rwds_sample_rise_gate ( + .clk_i ( sampling_clk ), + .en_i ( enable_sampling ), + .test_en_i ( test_mode_i ), + .clk_o ( sampling_clk_gated ) + ); + + // sample rwds exactly once using gated clock + `FF(rwds_sample, hyper_rwds_i, '0, sampling_clk_gated); + + // pass rwds to phy-clock domain + `FF(rwds_sample_o, rwds_sample, '0, clk_i); + +endmodule diff --git a/src/hyperbus_trx.sv b/src/hyperbus_trx.sv index b442b10..36592ff 100644 --- a/src/hyperbus_trx.sv +++ b/src/hyperbus_trx.sv @@ -7,36 +7,38 @@ // Stephan Keck module hyperbus_trx #( - parameter int unsigned IsClockODelayed = -1, parameter int unsigned NumChips = 2, parameter int unsigned RxFifoLogDepth = 3, parameter int unsigned SyncStages = 2 )( // Global signals - input logic clk_i, - input logic clk_i_90, - input logic rst_ni, - input logic test_mode_i, - // Transciever control: facing controller + input logic clk_i, + input logic clk_i_90, + input logic rst_ni, + input logic test_mode_i, + + input logic [3:0] cfg_edge_idx_i, + input logic cfg_edge_pol_i, + + // Transceiver control: facing controller input logic [NumChips-1:0] cs_i, input logic cs_ena_i, output logic rwds_sample_o, - input logic rwds_sample_ena_i, - input logic [3:0] tx_clk_delay_i, + input logic [4:0] tx_clk_delay_i, input logic tx_clk_ena_i, input logic [15:0] tx_data_i, input logic tx_data_oe_i, input logic [1:0] tx_rwds_i, input logic tx_rwds_oe_i, - input logic [3:0] rx_clk_delay_i, + input logic [4:0] rx_clk_delay_i, input logic rx_clk_set_i, input logic rx_clk_reset_i, output logic [15:0] rx_data_o, output logic rx_valid_o, input logic rx_ready_i, - // Physical interace: facing HyperBus + // Physical interface: facing HyperBus output logic [NumChips-1:0] hyper_cs_no, output logic hyper_ck_o, output logic hyper_ck_no, @@ -76,6 +78,8 @@ module hyperbus_trx #( assign tx_clk_90 = clk_i_90; // 90deg-shifted differential output clock, sampling output bytes centrally + // TODO: tx_clk_ena_q to tx_clk_90 may need a constraint at the pins of this module + // specifically tx_clk_ena_q must arrive BEFORE tx_clk_90 otherwise the gating may fail hyperbus_clock_diff_out i_clock_diff_out ( .in_i ( tx_clk_90 ), .en_i ( tx_clk_ena_q ), @@ -84,7 +88,7 @@ module hyperbus_trx #( ); // Synchronize output chip select to shifted differential output clock - always_ff @(posedge tx_clk_90 or negedge rst_ni) begin : proc_ff_tx_shift90 + always_ff @(negedge clk_i or negedge rst_ni) begin : proc_ff_tx_shift90 if (~rst_ni) hyper_cs_no <= '1; else hyper_cs_no <= cs_ena_i ? ~cs_i : '1; end @@ -127,16 +131,22 @@ module hyperbus_trx #( end end - // Sample RWDS on demand for extra latency determination - always_ff @(posedge clk_i or negedge rst_ni) begin : proc_ff_rwds_sample - if (~rst_ni) rwds_sample_o <= '0; - else if (rwds_sample_ena_i) rwds_sample_o <= hyper_rwds_i; - end - // ======== // RX // ======== + // sample RWDS for extra latency determination (adjustable sampling edge) + hyperbus_rwds_sampler i_rwds_sampler ( + .clk_i, + .rst_ni, + .test_mode_i, + .cfg_edge_idx_i, + .cfg_edge_pol_i, + .rwds_sample_o, + .hyper_cs_ni ( &hyper_cs_no ), + .hyper_rwds_i ( hyper_rwds_i ) + ); + // Set and Reset RX clock enable always_ff @(posedge clk_i or negedge rst_ni) begin : proc_ff_rx_delay if (~rst_ni) rx_rwds_clk_ena <= 1'b0; @@ -145,13 +155,25 @@ module hyperbus_trx #( end // Shift RWDS clock by 90 degrees - hyperbus_delay i_delay_rx_rwds_90 ( - .in_i ( hyper_rwds_i ), - .delay_i ( rx_clk_delay_i ), - .out_o ( rx_rwds_90 ) - ); +`ifdef TARGET_XILINX + hyperbus_rwds_delay i_delay_rx_rwds_90 ( + .rst_i ( ~rst_ni ), + .clk_i, + .in_i ( hyper_rwds_i ), + .delay_i ( rx_clk_delay_i ), + .out_o ( rx_rwds_90 ) + ); + `else + hyperbus_delay i_delay_rx_rwds_90 ( + .in_i ( hyper_rwds_i ), + .delay_i ( rx_clk_delay_i ), + .out_o ( rx_rwds_90 ) + ); +`endif // Gate delayed RWDS clock with RX clock enable + (* no_boundary_optimization *) + (* keep_hierarchy = "yes" *) tc_clk_gating i_rwds_in_clk_gate ( .clk_i ( rx_rwds_90 ), .en_i ( rx_rwds_clk_ena ), diff --git a/target/xilinx/hyperbus_clk_delay.sv b/target/xilinx/hyperbus_clk_delay.sv new file mode 100644 index 0000000..c67957d --- /dev/null +++ b/target/xilinx/hyperbus_clk_delay.sv @@ -0,0 +1,83 @@ +// Copyright 2025 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Philippe Sauter + +(* no_ungroup *) +(* no_boundary_optimization *) +(* keep_hierarchy = "yes" *) +module hyperbus_clk_delay +( + input logic rst_i, + input logic clk_ref200_i, // 200 MHz reference clock + input logic clk_i, // control clock used to load delay_i + input logic in_i, + input logic [4:0] delay_i, + output logic out_o +); + + // "Xilinx 7 Series FPGA and Zynq-7000 All Programmable SoC Libraries Guide for HDL Designs" - page 192 + // Calibrates delay lines (IDELAYE2 and ODELAYE2) from ref clock (200 MHz) + IDELAYCTRL i_delayctrl + ( + .REFCLK ( clk_ref200_i ), + .RST ( rst_i ), + .RDY () + ); + + // "Xilinx 7 Series FPGA and Zynq-7000 All Programmable SoC Libraries Guide for HDL Designs" - page 330 + // Delay a signal going out to IOs or delay an internal clock + // modes: + // - FIXED: constant delay value from param + // - VARIABLE: start with param value then increment/decrement + // - VAR_LOAD: dynamically load tap values + // - VAR_LOAD_PIPE: pipelines dynamic load + // ODELAYE2 #( + // .CINVCTRL_SEL ( "FALSE" ), // "TRUE" actives CINVCTRL functionality + // .DELAY_SRC ( "CLKIN" ), // source to delay chain ("CLKIN" or "ODATAIN") + // .HIGH_PERFORMANCE_MODE ( "TRUE" ), // "TRUE" for less jitter; "FALSE" for low power + // .ODELAY_TYPE ( "VAR_LOAD" ), // mode of operation, see above + // .ODELAY_VALUE ( 0 ), // delay value 0-31 (used in "VARIABLE" and "FIXED" mode) + // .PIPE_SEL ( "FALSE" ), // "TRUE" activates pipelined operation + // .REFCLK_FREQUENCY ( 200.0 ), // used for STA and simulation (190.0 - 310.0 MHz) + // .SIGNAL_PATTERN ( "CLOCK" ) // "DATA" or "CLOCK" depending on function, used in STA + // ) i_delay ( + // .REGRST ( rst_i ), // input: reset delay tap value to ODELAY_VALUE or CNTVALUEIN + // .C ( clk_i ), // input: control input clock + // .CLKIN ( in_i ), // input: clock to be delayed + // .ODATAIN ( 1'b0 ), // input: signal to be delayed driven by OSERDESE2 or output reg + // .DATAOUT ( out_o ), // output: delayed from ODATAIN (drives IO) or CLKIN (back into clock network) + // .CE ( 1'b0 ), // input: increment/decrement enable + // .CINVCTRL ( 1'b0 ), // input: switch clock polarity during operation (glitches!) + // .CNTVALUEIN ( delay_i ), // 5 bit input: delay tap + // .CNTVALUEOUT ( ), // 5 bit output: delay tap + // .INC ( 1'b0 ), // input: increment/decrement delay tap + // .LD ( 1'b0 ), // input: load ODELAY_VALUE param or CNTVALUEIN (depends on IDELAY_TYPE) + // .LDPIPEEN ( 1'b0 ) // input: enable the pipeline register to load data from LD + // ); + IDELAYE2 #( + .CINVCTRL_SEL ( "FALSE" ), // "TRUE" actives CINVCTRL functionality + .DELAY_SRC ( "DATAIN" ), // source to delay chain ("CLKIN" or "IDATAIN") + .HIGH_PERFORMANCE_MODE ( "TRUE" ), // "TRUE" for less jitter; "FALSE" for low power + .IDELAY_TYPE ( "VAR_LOAD" ), // mode of operation, see above + .IDELAY_VALUE ( 0 ), // delay value 0-31 (used in "VARIABLE" and "FIXED" mode) + .PIPE_SEL ( "FALSE" ), // "TRUE" activates pipelined operation + .REFCLK_FREQUENCY ( 200.0 ), // used for STA and simulation (190.0 - 310.0 MHz) + .SIGNAL_PATTERN ( "CLOCK" ) // "DATA" or "CLOCK" depending on function, used in STA + ) i_delay ( + .REGRST ( rst_i ), // input: reset delay tap value to IDELAY_VALUE or CNTVALUEIN + .C ( clk_i ), // input: control input clock + .DATAIN ( in_i ), // input: signal from FPGA logic to be delayed + .IDATAIN ( 1'b0 ), // input: signal from IO to be delayed + .DATAOUT ( out_o ), // output: delayed from DATAIN or IDATAIN (drives ISERDESE2 or logic, not IO!) + .CE ( 1'b0 ), // input: increment/decrement enable + .CINVCTRL ( 1'b0 ), // input: switch clock polarity during operation (glitches!) + .CNTVALUEIN ( delay_i ), // 5 bit input: delay tap + .CNTVALUEOUT ( ), // 5 bit output: delay tap + .LD ( 1'b1 ), // input: load IDELAY_VALUE param or CNTVALUEIN (depends on IDELAY_TYPE) + .INC ( 1'b0 ), // input: increment/decrement delay tap + .LDPIPEEN ( 1'b0 ) // input: enable the pipeline register to load data from LD + ); + +endmodule diff --git a/target/xilinx/hyperbus_rwds_delay.sv b/target/xilinx/hyperbus_rwds_delay.sv new file mode 100644 index 0000000..9bff254 --- /dev/null +++ b/target/xilinx/hyperbus_rwds_delay.sv @@ -0,0 +1,50 @@ +// Copyright 2025 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Philippe Sauter + +(* no_ungroup *) +(* no_boundary_optimization *) +(* keep_hierarchy = "yes" *) +module hyperbus_rwds_delay +( + input logic rst_i, + input logic in_i, + input logic clk_i, // control clock used to load delay_i + input logic [4:0] delay_i, + output logic out_o +); + + // "Xilinx 7 Series FPGA and Zynq-7000 All Programmable SoC Libraries Guide for HDL Designs" - page 194 + // Delay a signal coming in from IOs or delay an internal data signal + // modes: + // - FIXED: constant delay value from param + // - VARIABLE: start with param value then increment/decrement + // - VAR_LOAD: dynamically load tap values + // - VAR_LOAD_PIPE: pipelines dynamic load + IDELAYE2 #( + .CINVCTRL_SEL ( "FALSE" ), // "TRUE" actives CINVCTRL functionality + .DELAY_SRC ( "DATAIN" ), // source to delay chain ("CLKIN" or "IDATAIN") + .HIGH_PERFORMANCE_MODE ( "TRUE" ), // "TRUE" for less jitter; "FALSE" for low power + .IDELAY_TYPE ( "VAR_LOAD" ), // mode of operation, see above + .IDELAY_VALUE ( 0 ), // delay value 0-31 (used in "VARIABLE" and "FIXED" mode) + .PIPE_SEL ( "FALSE" ), // "TRUE" activates pipelined operation + .REFCLK_FREQUENCY ( 200.0 ), // used for STA and simulation (190.0 - 310.0 MHz) + .SIGNAL_PATTERN ( "CLOCK" ) // "DATA" or "CLOCK" depending on function, used in STA + ) i_delay ( + .REGRST ( rst_i ), // input: reset delay tap value to IDELAY_VALUE or CNTVALUEIN + .C ( clk_i ), // input: control input clock + .DATAIN ( in_i ), // input: signal from FPGA logic to be delayed + .IDATAIN ( 1'b0 ), // input: signal from IO to be delayed + .DATAOUT ( out_o ), // output: delayed from DATAIN or IDATAIN (drives ISERDESE2 or logic, not IO!) + .CE ( 1'b0 ), // input: increment/decrement enable + .CINVCTRL ( 1'b0 ), // input: switch clock polarity during operation (glitches!) + .CNTVALUEIN ( delay_i ), // 5 bit input: delay tap + .CNTVALUEOUT ( ), // 5 bit output: delay tap + .LD ( 1'b1 ), // input: load IDELAY_VALUE param or CNTVALUEIN (depends on IDELAY_TYPE) + .INC ( 1'b0 ), // input: increment/decrement delay tap + .LDPIPEEN ( 1'b0 ) // input: enable the pipeline register to load data from LD + ); + +endmodule diff --git a/test/axi_hyper_tb.sv b/test/axi_hyper_tb.sv index d3d4bb3..aef556f 100644 --- a/test/axi_hyper_tb.sv +++ b/test/axi_hyper_tb.sv @@ -28,12 +28,14 @@ module axi_hyper_tb /// Test time of the DUT parameter time TbTestTime = 4ns ); + import hyperbus_tb_pkg::*; ///////////////////////////// // Axi channel definitions // ///////////////////////////// `include "axi/typedef.svh" `include "axi/assign.svh" + ///////////////////////// // Clock and Reset gen // ///////////////////////// @@ -59,9 +61,11 @@ module axi_hyper_tb logic end_of_sim; - //////////////////////////////// - // Stimuli generator typedefs // - //////////////////////////////// + + /////////////////////// + // AXI Random Master // + /////////////////////// + // AXI master for random data transactions typedef axi_test::axi_rand_master #( .AW ( TbAxiAddrWidthFull ), .DW ( TbAxiDataWidthFull ), @@ -92,20 +96,12 @@ module axi_hyper_tb .TT( TbTestTime ) ) axi_scoreboard_mst_t; - typedef reg_test::reg_driver #( - .AW ( RegBusAW ), - .DW ( RegBusDW ), - .TT ( TbTestTime ) - ) reg_bus_master_t; - - logic s_reg_error; - AXI_BUS_DV #( .AXI_ADDR_WIDTH ( TbAxiAddrWidthFull ), .AXI_DATA_WIDTH ( TbAxiDataWidthFull ), .AXI_ID_WIDTH ( TbAxiIdWidthFull ), .AXI_USER_WIDTH ( TbAxiUserWidthFull ) - ) axi_mst_intf_dv ( + ) axi_rand_intf_dv ( .clk_i ( clk ) ); @@ -118,12 +114,125 @@ module axi_hyper_tb .clk_i ( clk ) ); - `AXI_ASSIGN_MONITOR(score_mst_intf_dv, axi_mst_intf_dv) + AXI_BUS #( + .AXI_ADDR_WIDTH ( TbAxiAddrWidthFull ), + .AXI_DATA_WIDTH ( TbAxiDataWidthFull ), + .AXI_ID_WIDTH ( TbAxiIdWidthFull ), + .AXI_USER_WIDTH ( TbAxiUserWidthFull ) + ) axi_rand_intf (); + + `AXI_ASSIGN_MONITOR(score_mst_intf_dv, axi_rand_intf_dv) + `AXI_ASSIGN(axi_rand_intf, axi_rand_intf_dv) + + + + //////////////////////// + // AXI Control Master // + //////////////////////// + + AXI_BUS_DV #( + .AXI_ADDR_WIDTH ( TbAxiAddrWidthFull ), + .AXI_DATA_WIDTH ( TbAxiDataWidthFull ), + .AXI_ID_WIDTH ( TbAxiIdWidthFull ), + .AXI_USER_WIDTH ( TbAxiUserWidthFull ) + ) axi_ctrl_intf_dv ( + .clk_i ( clk ) + ); + + AXI_BUS #( + .AXI_ADDR_WIDTH ( TbAxiAddrWidthFull ), + .AXI_DATA_WIDTH ( TbAxiDataWidthFull ), + .AXI_ID_WIDTH ( TbAxiIdWidthFull ), + .AXI_USER_WIDTH ( TbAxiUserWidthFull ) + ) axi_ctrl_intf (); + + typedef axi_test::axi_driver #( + .AW ( TbAxiAddrWidthFull ), + .DW ( TbAxiDataWidthFull ), + .IW ( TbAxiIdWidthFull ), + .UW ( TbAxiUserWidthFull ), + .TA ( TbApplTime ), + .TT ( TbTestTime ) + ) axi_ctrl_master_t; + axi_ctrl_master_t axi_ctrl_mst = new( axi_ctrl_intf_dv ); + + `AXI_ASSIGN(axi_ctrl_intf, axi_ctrl_intf_dv) + + logic s_axi_error; + + + ////////////////////////////// + // AXI Control Master Tasks // + ////////////////////////////// + task automatic axi_write_32( + input axi_addr_t addr, + input bit [31:0] data + ); + axi_ctrl_master_t::ax_beat_t ax = new(); + axi_ctrl_master_t::w_beat_t w = new(); + axi_ctrl_master_t::b_beat_t b; + + @(posedge clk); + ax.ax_addr = addr; + ax.ax_id = 0; + ax.ax_len = 0; + ax.ax_size = 2; + ax.ax_burst = axi_pkg::BURST_INCR; + axi_ctrl_mst.send_aw(ax); + w.w_strb = 'h0F; + w.w_data = data; + w.w_last = 1; + axi_ctrl_mst.send_w(w); + axi_ctrl_mst.recv_b(b); + if (b.b_resp != axi_pkg::RESP_OKAY) + $error("[AXI-CTRL] - Write error response: %d!", b.b_resp); + endtask + + + /////////////////// + // Regbus Master // + /////////////////// + typedef reg_test::reg_driver #( + .AW ( RegBusAW ), + .DW ( RegBusDW ), + .TT ( TbTestTime ) + ) reg_bus_master_t; + + logic s_reg_error; REG_BUS #( .ADDR_WIDTH(RegBusAW), .DATA_WIDTH(RegBusDW) ) reg_bus_mst (.clk_i (clk)); + + + + //////////////////// + // AXI Master MUX // + //////////////////// + + AXI_BUS #( + .AXI_ADDR_WIDTH ( TbAxiAddrWidthFull ), + .AXI_DATA_WIDTH ( TbAxiDataWidthFull ), + .AXI_ID_WIDTH ( TbAxiIdWidthFull +1 ), + .AXI_USER_WIDTH ( TbAxiUserWidthFull ) + ) axi_dut_intf (); + + axi_mux_intf #( + .SLV_AXI_ID_WIDTH ( TbAxiIdWidthFull ), + .MST_AXI_ID_WIDTH ( TbAxiIdWidthFull +1 ), + .AXI_ADDR_WIDTH ( TbAxiAddrWidthFull ), + .AXI_DATA_WIDTH ( TbAxiDataWidthFull ), + .AXI_USER_WIDTH ( TbAxiUserWidthFull ), + .NO_SLV_PORTS ( 2 ) + ) i_axi_mst_mux ( + .clk_i ( clk ), + .rst_ni ( rst_n ), + .test_i ( 1'b0 ), + .slv ( { axi_ctrl_intf, axi_rand_intf } ), + .mst ( axi_dut_intf ) + ); + //////////////////// // Address Ranges // @@ -136,18 +245,21 @@ module axi_hyper_tb initial begin : proc_sim_crtl - automatic axi_scoreboard_mst_t mst_scoreboard = new( score_mst_intf_dv ); - automatic axi_rand_master_t axi_master = new( axi_mst_intf_dv ); - automatic reg_bus_master_t reg_master = new( reg_bus_mst ); + automatic axi_scoreboard_mst_t mst_scoreboard = new( score_mst_intf_dv ); + automatic axi_rand_master_t axi_rand_mst = new( axi_rand_intf_dv ); + automatic reg_bus_master_t reg_master = new( reg_bus_mst ); + + automatic s27ks_cfg0_reg_t s27ks_cfg0 = hyperbus_tb_pkg::s27ks_cfg0_default; // Reset the AXI drivers and scoreboards end_of_sim = 1'b0; mst_scoreboard.reset(); - axi_master.reset(); + axi_rand_mst.reset(); + axi_ctrl_mst.reset_master(); reg_master.reset_master(); // Set some mem regions for rand axi master - axi_master.add_memory_region(32'h8000_0000, 32'h8000_0000 + ( TbDramDataWidth * TbDramLenWidth ), axi_pkg::NORMAL_NONCACHEABLE_BUFFERABLE); + axi_rand_mst.add_memory_region(32'h8000_0000, 32'h8000_0000 + ( TbDramDataWidth * TbDramLenWidth ), axi_pkg::NORMAL_NONCACHEABLE_BUFFERABLE); mst_scoreboard.enable_all_checks(); @@ -156,11 +268,24 @@ module axi_hyper_tb #600350ns; + // switch memory address space to register space + reg_master.send_write(32'h7<<2, 1'b1, '1, s_reg_error); + if (s_reg_error != 1'b0) $error("unexpected error"); + + // enable variable latency so we can test RWDS sampling + s27ks_cfg0.fixed_latency_enable = 1'b0; + $display("t3est"); + axi_write_32(32'h8000_0000 + S27KS_CFG0_REG_OFFSET, (s27ks_cfg0 | s27ks_cfg0 << 16)); + + // switch back to memory address space + reg_master.send_write(32'h7<<2, 1'b0, '1, s_reg_error); + if (s_axi_error != 1'b0) $error("unexpected error"); + $display("==========================="); $display("= Random AXI transactions ="); $display("==========================="); - axi_master.run(TbNumReads, TbNumWrites); + axi_rand_mst.run(TbNumReads, TbNumWrites); $display("==========================="); $display("= Test finished ="); @@ -180,13 +305,13 @@ module axi_hyper_tb reg_master.send_write(32'h24,1'b0,'1,s_reg_error); if (s_reg_error != 1'b0) $error("unexpected error"); - axi_master.reset(); + axi_rand_mst.reset(); $display("==========================="); $display("= Random AXI transactions ="); $display("==========================="); - axi_master.run(TbNumReads, TbNumWrites); + axi_rand_mst.run(TbNumReads, TbNumWrites); $display("==========================="); $display("= Test finished ="); @@ -201,13 +326,13 @@ module axi_hyper_tb reg_master.send_write(32'h24,1'b1,'1,s_reg_error); if (s_reg_error != 1'b0) $error("unexpected error"); - axi_master.reset(); + axi_rand_mst.reset(); $display("==========================="); $display("= Random AXI transactions ="); $display("==========================="); - axi_master.run(TbNumReads, TbNumWrites); + axi_rand_mst.run(TbNumReads, TbNumWrites); $display("==========================="); $display("= Test finished ="); @@ -226,7 +351,7 @@ module axi_hyper_tb .TbTestTime ( TbTestTime ), .AxiDataWidth ( TbAxiDataWidthFull ), .AxiAddrWidth ( TbAxiAddrWidthFull ), - .AxiIdWidth ( TbAxiIdWidthFull ), + .AxiIdWidth ( TbAxiIdWidthFull+1 ), .AxiUserWidth ( TbAxiUserWidthFull ), .RegAw ( RegBusAW ), @@ -238,11 +363,11 @@ module axi_hyper_tb .axi_rule_t ( rule_t ) ) i_dut_if ( // clk and rst signal - .clk_i ( clk ), - .rst_ni ( rst_n ), - .end_sim_i ( end_of_sim ), - .axi_slv_if ( axi_mst_intf_dv ), - .reg_slv_if ( reg_bus_mst ) + .clk_i ( clk ), + .rst_ni ( rst_n ), + .end_sim_i ( end_of_sim ), + .axi_slv_if ( axi_dut_intf ), + .reg_slv_if ( reg_bus_mst ) ); endmodule diff --git a/test/dut_if.sv b/test/dut_if.sv index da49ec6..7a06e5e 100644 --- a/test/dut_if.sv +++ b/test/dut_if.sv @@ -31,8 +31,8 @@ module dut_if input logic rst_ni, input logic end_sim_i, - AXI_BUS_DV.Slave axi_slv_if, - REG_BUS.in reg_slv_if + AXI_BUS.Slave axi_slv_if, + REG_BUS.in reg_slv_if ); localparam int unsigned DRAM_DB_WIDTH = 16; @@ -124,25 +124,25 @@ module dut_if // DUT hyperbus #( - .NumChips ( NumChips ), - .NumPhys ( NumPhys ), - .AxiAddrWidth ( AxiAddrWidth ), - .AxiDataWidth ( AxiDataWidth ), - .AxiIdWidth ( AxiIdWidth ), - .AxiUserWidth ( AxiUserWidth ), - .axi_req_t ( axi_req_t ), - .axi_rsp_t ( axi_resp_t ), - .axi_aw_chan_t ( axi_aw_chan_t ), - .axi_w_chan_t ( axi_w_chan_t ), - .axi_b_chan_t ( axi_b_chan_t ), - .axi_ar_chan_t ( axi_ar_chan_t ), - .axi_r_chan_t ( axi_r_chan_t ), - .RegAddrWidth ( RegAw ), - .RegDataWidth ( RegDw ), - .reg_req_t ( reg_req_t ), - .reg_rsp_t ( reg_rsp_t ), - .IsClockODelayed( 0 ), - .axi_rule_t ( axi_rule_t ) + .NumChips ( NumChips ), + .NumPhys ( NumPhys ), + .AxiAddrWidth ( AxiAddrWidth ), + .AxiDataWidth ( AxiDataWidth ), + .AxiIdWidth ( AxiIdWidth ), + .AxiUserWidth ( AxiUserWidth ), + .axi_req_t ( axi_req_t ), + .axi_rsp_t ( axi_resp_t ), + .axi_aw_chan_t ( axi_aw_chan_t ), + .axi_w_chan_t ( axi_w_chan_t ), + .axi_b_chan_t ( axi_b_chan_t ), + .axi_ar_chan_t ( axi_ar_chan_t ), + .axi_r_chan_t ( axi_r_chan_t ), + .RegAddrWidth ( RegAw ), + .RegDataWidth ( RegDw ), + .reg_req_t ( reg_req_t ), + .reg_rsp_t ( reg_rsp_t ), + .UsePhyClkDivider ( 1 ), + .axi_rule_t ( axi_rule_t ) ) i_dut ( .clk_phy_i ( clk_i ), .rst_phy_ni ( rst_ni ), diff --git a/test/hyperbus_tb_pkg.sv b/test/hyperbus_tb_pkg.sv new file mode 100644 index 0000000..a1bc827 --- /dev/null +++ b/test/hyperbus_tb_pkg.sv @@ -0,0 +1,32 @@ +// Copyright 2024 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +package hyperbus_tb_pkg; + + parameter int unsigned S27KS_ID0_REG_OFFSET = 32'h0000_0000; + parameter int unsigned S27KS_ID1_REG_OFFSET = 32'h0000_0002; + parameter int unsigned S27KS_CFG0_REG_OFFSET = 32'h0000_2000; + parameter int unsigned S27KS_CFG1_REG_OFFSET = 32'h0000_2002; + + typedef struct packed { + bit deep_power_done; + bit [2:0] drive_strength; + bit [3:0] reserved; + bit [3:0] initial_latency; + bit fixed_latency_enable; + bit hybrid_burst_enable; + bit [1:0] burst_length; + } s27ks_cfg0_reg_t; + + parameter s27ks_cfg0_reg_t s27ks_cfg0_default = s27ks_cfg0_reg_t'{ + deep_power_done: 1'h1, + drive_strength: 3'h0, + reserved: 4'hF, + initial_latency: 4'h1, + fixed_latency_enable: 1'b1, + hybrid_burst_enable: 1'b1, + burst_length: 2'h3 + }; + +endpackage