From 8b2f51a790f721a91ee1d03cee567c584b32acf1 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Fri, 29 Nov 2024 22:57:21 +0100 Subject: [PATCH 01/16] Set Hyperram chips conf register in tb --- Bender.yml | 1 + test/axi_hyper_tb.sv | 183 +++++++++++++++++++++++++++++++++------- test/dut_if.sv | 4 +- test/hyperbus_tb_pkg.sv | 32 +++++++ 4 files changed, 189 insertions(+), 31 deletions(-) create mode 100644 test/hyperbus_tb_pkg.sv diff --git a/Bender.yml b/Bender.yml index 905e54d..e5a6717 100644 --- a/Bender.yml +++ b/Bender.yml @@ -47,5 +47,6 @@ sources: - test/fixture_hyperbus.sv - test/hyperbus_tb.sv - test/dut_if.sv + - test/hyperbus_tb_pkg.sv - test/axi_hyper_tb.sv - src/hyperbus.sv diff --git a/test/axi_hyper_tb.sv b/test/axi_hyper_tb.sv index d3d4bb3..aef556f 100644 --- a/test/axi_hyper_tb.sv +++ b/test/axi_hyper_tb.sv @@ -28,12 +28,14 @@ module axi_hyper_tb /// Test time of the DUT parameter time TbTestTime = 4ns ); + import hyperbus_tb_pkg::*; ///////////////////////////// // Axi channel definitions // ///////////////////////////// `include "axi/typedef.svh" `include "axi/assign.svh" + ///////////////////////// // Clock and Reset gen // ///////////////////////// @@ -59,9 +61,11 @@ module axi_hyper_tb logic end_of_sim; - //////////////////////////////// - // Stimuli generator typedefs // - //////////////////////////////// + + /////////////////////// + // AXI Random Master // + /////////////////////// + // AXI master for random data transactions typedef axi_test::axi_rand_master #( .AW ( TbAxiAddrWidthFull ), .DW ( TbAxiDataWidthFull ), @@ -92,20 +96,12 @@ module axi_hyper_tb .TT( TbTestTime ) ) axi_scoreboard_mst_t; - typedef reg_test::reg_driver #( - .AW ( RegBusAW ), - .DW ( RegBusDW ), - .TT ( TbTestTime ) - ) reg_bus_master_t; - - logic s_reg_error; - AXI_BUS_DV #( .AXI_ADDR_WIDTH ( TbAxiAddrWidthFull ), .AXI_DATA_WIDTH ( TbAxiDataWidthFull ), .AXI_ID_WIDTH ( TbAxiIdWidthFull ), .AXI_USER_WIDTH ( TbAxiUserWidthFull ) - ) axi_mst_intf_dv ( + ) axi_rand_intf_dv ( .clk_i ( clk ) ); @@ -118,12 +114,125 @@ module axi_hyper_tb .clk_i ( clk ) ); - `AXI_ASSIGN_MONITOR(score_mst_intf_dv, axi_mst_intf_dv) + AXI_BUS #( + .AXI_ADDR_WIDTH ( TbAxiAddrWidthFull ), + .AXI_DATA_WIDTH ( TbAxiDataWidthFull ), + .AXI_ID_WIDTH ( TbAxiIdWidthFull ), + .AXI_USER_WIDTH ( TbAxiUserWidthFull ) + ) axi_rand_intf (); + + `AXI_ASSIGN_MONITOR(score_mst_intf_dv, axi_rand_intf_dv) + `AXI_ASSIGN(axi_rand_intf, axi_rand_intf_dv) + + + + //////////////////////// + // AXI Control Master // + //////////////////////// + + AXI_BUS_DV #( + .AXI_ADDR_WIDTH ( TbAxiAddrWidthFull ), + .AXI_DATA_WIDTH ( TbAxiDataWidthFull ), + .AXI_ID_WIDTH ( TbAxiIdWidthFull ), + .AXI_USER_WIDTH ( TbAxiUserWidthFull ) + ) axi_ctrl_intf_dv ( + .clk_i ( clk ) + ); + + AXI_BUS #( + .AXI_ADDR_WIDTH ( TbAxiAddrWidthFull ), + .AXI_DATA_WIDTH ( TbAxiDataWidthFull ), + .AXI_ID_WIDTH ( TbAxiIdWidthFull ), + .AXI_USER_WIDTH ( TbAxiUserWidthFull ) + ) axi_ctrl_intf (); + + typedef axi_test::axi_driver #( + .AW ( TbAxiAddrWidthFull ), + .DW ( TbAxiDataWidthFull ), + .IW ( TbAxiIdWidthFull ), + .UW ( TbAxiUserWidthFull ), + .TA ( TbApplTime ), + .TT ( TbTestTime ) + ) axi_ctrl_master_t; + axi_ctrl_master_t axi_ctrl_mst = new( axi_ctrl_intf_dv ); + + `AXI_ASSIGN(axi_ctrl_intf, axi_ctrl_intf_dv) + + logic s_axi_error; + + + ////////////////////////////// + // AXI Control Master Tasks // + ////////////////////////////// + task automatic axi_write_32( + input axi_addr_t addr, + input bit [31:0] data + ); + axi_ctrl_master_t::ax_beat_t ax = new(); + axi_ctrl_master_t::w_beat_t w = new(); + axi_ctrl_master_t::b_beat_t b; + + @(posedge clk); + ax.ax_addr = addr; + ax.ax_id = 0; + ax.ax_len = 0; + ax.ax_size = 2; + ax.ax_burst = axi_pkg::BURST_INCR; + axi_ctrl_mst.send_aw(ax); + w.w_strb = 'h0F; + w.w_data = data; + w.w_last = 1; + axi_ctrl_mst.send_w(w); + axi_ctrl_mst.recv_b(b); + if (b.b_resp != axi_pkg::RESP_OKAY) + $error("[AXI-CTRL] - Write error response: %d!", b.b_resp); + endtask + + + /////////////////// + // Regbus Master // + /////////////////// + typedef reg_test::reg_driver #( + .AW ( RegBusAW ), + .DW ( RegBusDW ), + .TT ( TbTestTime ) + ) reg_bus_master_t; + + logic s_reg_error; REG_BUS #( .ADDR_WIDTH(RegBusAW), .DATA_WIDTH(RegBusDW) ) reg_bus_mst (.clk_i (clk)); + + + + //////////////////// + // AXI Master MUX // + //////////////////// + + AXI_BUS #( + .AXI_ADDR_WIDTH ( TbAxiAddrWidthFull ), + .AXI_DATA_WIDTH ( TbAxiDataWidthFull ), + .AXI_ID_WIDTH ( TbAxiIdWidthFull +1 ), + .AXI_USER_WIDTH ( TbAxiUserWidthFull ) + ) axi_dut_intf (); + + axi_mux_intf #( + .SLV_AXI_ID_WIDTH ( TbAxiIdWidthFull ), + .MST_AXI_ID_WIDTH ( TbAxiIdWidthFull +1 ), + .AXI_ADDR_WIDTH ( TbAxiAddrWidthFull ), + .AXI_DATA_WIDTH ( TbAxiDataWidthFull ), + .AXI_USER_WIDTH ( TbAxiUserWidthFull ), + .NO_SLV_PORTS ( 2 ) + ) i_axi_mst_mux ( + .clk_i ( clk ), + .rst_ni ( rst_n ), + .test_i ( 1'b0 ), + .slv ( { axi_ctrl_intf, axi_rand_intf } ), + .mst ( axi_dut_intf ) + ); + //////////////////// // Address Ranges // @@ -136,18 +245,21 @@ module axi_hyper_tb initial begin : proc_sim_crtl - automatic axi_scoreboard_mst_t mst_scoreboard = new( score_mst_intf_dv ); - automatic axi_rand_master_t axi_master = new( axi_mst_intf_dv ); - automatic reg_bus_master_t reg_master = new( reg_bus_mst ); + automatic axi_scoreboard_mst_t mst_scoreboard = new( score_mst_intf_dv ); + automatic axi_rand_master_t axi_rand_mst = new( axi_rand_intf_dv ); + automatic reg_bus_master_t reg_master = new( reg_bus_mst ); + + automatic s27ks_cfg0_reg_t s27ks_cfg0 = hyperbus_tb_pkg::s27ks_cfg0_default; // Reset the AXI drivers and scoreboards end_of_sim = 1'b0; mst_scoreboard.reset(); - axi_master.reset(); + axi_rand_mst.reset(); + axi_ctrl_mst.reset_master(); reg_master.reset_master(); // Set some mem regions for rand axi master - axi_master.add_memory_region(32'h8000_0000, 32'h8000_0000 + ( TbDramDataWidth * TbDramLenWidth ), axi_pkg::NORMAL_NONCACHEABLE_BUFFERABLE); + axi_rand_mst.add_memory_region(32'h8000_0000, 32'h8000_0000 + ( TbDramDataWidth * TbDramLenWidth ), axi_pkg::NORMAL_NONCACHEABLE_BUFFERABLE); mst_scoreboard.enable_all_checks(); @@ -156,11 +268,24 @@ module axi_hyper_tb #600350ns; + // switch memory address space to register space + reg_master.send_write(32'h7<<2, 1'b1, '1, s_reg_error); + if (s_reg_error != 1'b0) $error("unexpected error"); + + // enable variable latency so we can test RWDS sampling + s27ks_cfg0.fixed_latency_enable = 1'b0; + $display("t3est"); + axi_write_32(32'h8000_0000 + S27KS_CFG0_REG_OFFSET, (s27ks_cfg0 | s27ks_cfg0 << 16)); + + // switch back to memory address space + reg_master.send_write(32'h7<<2, 1'b0, '1, s_reg_error); + if (s_axi_error != 1'b0) $error("unexpected error"); + $display("==========================="); $display("= Random AXI transactions ="); $display("==========================="); - axi_master.run(TbNumReads, TbNumWrites); + axi_rand_mst.run(TbNumReads, TbNumWrites); $display("==========================="); $display("= Test finished ="); @@ -180,13 +305,13 @@ module axi_hyper_tb reg_master.send_write(32'h24,1'b0,'1,s_reg_error); if (s_reg_error != 1'b0) $error("unexpected error"); - axi_master.reset(); + axi_rand_mst.reset(); $display("==========================="); $display("= Random AXI transactions ="); $display("==========================="); - axi_master.run(TbNumReads, TbNumWrites); + axi_rand_mst.run(TbNumReads, TbNumWrites); $display("==========================="); $display("= Test finished ="); @@ -201,13 +326,13 @@ module axi_hyper_tb reg_master.send_write(32'h24,1'b1,'1,s_reg_error); if (s_reg_error != 1'b0) $error("unexpected error"); - axi_master.reset(); + axi_rand_mst.reset(); $display("==========================="); $display("= Random AXI transactions ="); $display("==========================="); - axi_master.run(TbNumReads, TbNumWrites); + axi_rand_mst.run(TbNumReads, TbNumWrites); $display("==========================="); $display("= Test finished ="); @@ -226,7 +351,7 @@ module axi_hyper_tb .TbTestTime ( TbTestTime ), .AxiDataWidth ( TbAxiDataWidthFull ), .AxiAddrWidth ( TbAxiAddrWidthFull ), - .AxiIdWidth ( TbAxiIdWidthFull ), + .AxiIdWidth ( TbAxiIdWidthFull+1 ), .AxiUserWidth ( TbAxiUserWidthFull ), .RegAw ( RegBusAW ), @@ -238,11 +363,11 @@ module axi_hyper_tb .axi_rule_t ( rule_t ) ) i_dut_if ( // clk and rst signal - .clk_i ( clk ), - .rst_ni ( rst_n ), - .end_sim_i ( end_of_sim ), - .axi_slv_if ( axi_mst_intf_dv ), - .reg_slv_if ( reg_bus_mst ) + .clk_i ( clk ), + .rst_ni ( rst_n ), + .end_sim_i ( end_of_sim ), + .axi_slv_if ( axi_dut_intf ), + .reg_slv_if ( reg_bus_mst ) ); endmodule diff --git a/test/dut_if.sv b/test/dut_if.sv index da49ec6..af257eb 100644 --- a/test/dut_if.sv +++ b/test/dut_if.sv @@ -31,8 +31,8 @@ module dut_if input logic rst_ni, input logic end_sim_i, - AXI_BUS_DV.Slave axi_slv_if, - REG_BUS.in reg_slv_if + AXI_BUS.Slave axi_slv_if, + REG_BUS.in reg_slv_if ); localparam int unsigned DRAM_DB_WIDTH = 16; diff --git a/test/hyperbus_tb_pkg.sv b/test/hyperbus_tb_pkg.sv new file mode 100644 index 0000000..a1bc827 --- /dev/null +++ b/test/hyperbus_tb_pkg.sv @@ -0,0 +1,32 @@ +// Copyright 2024 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +package hyperbus_tb_pkg; + + parameter int unsigned S27KS_ID0_REG_OFFSET = 32'h0000_0000; + parameter int unsigned S27KS_ID1_REG_OFFSET = 32'h0000_0002; + parameter int unsigned S27KS_CFG0_REG_OFFSET = 32'h0000_2000; + parameter int unsigned S27KS_CFG1_REG_OFFSET = 32'h0000_2002; + + typedef struct packed { + bit deep_power_done; + bit [2:0] drive_strength; + bit [3:0] reserved; + bit [3:0] initial_latency; + bit fixed_latency_enable; + bit hybrid_burst_enable; + bit [1:0] burst_length; + } s27ks_cfg0_reg_t; + + parameter s27ks_cfg0_reg_t s27ks_cfg0_default = s27ks_cfg0_reg_t'{ + deep_power_done: 1'h1, + drive_strength: 3'h0, + reserved: 4'hF, + initial_latency: 4'h1, + fixed_latency_enable: 1'b1, + hybrid_burst_enable: 1'b1, + burst_length: 2'h3 + }; + +endpackage From 3bcf35c8a8b802cd75bb2affc22a857bc10057cf Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Tue, 28 Oct 2025 15:55:13 +0100 Subject: [PATCH 02/16] Remove unused parameters --- src/hyperbus.sv | 1 - src/hyperbus_phy_if.sv | 106 ++++++++++++++++++++--------------------- 2 files changed, 52 insertions(+), 55 deletions(-) diff --git a/src/hyperbus.sv b/src/hyperbus.sv index cf7212d..ed7bdcf 100644 --- a/src/hyperbus.sv +++ b/src/hyperbus.sv @@ -34,7 +34,6 @@ module hyperbus #( parameter logic [RegDataWidth-1:0] RstChipSpace = 'h1_0000, // 64 KiB: Current maximum HyperBus device size parameter hyperbus_pkg::hyper_cfg_t RstCfg = hyperbus_pkg::gen_RstCfg(NumPhys,MinFreqMHz), parameter int unsigned PhyStartupCycles = 300 * 200, /* us*MHz */ // Conservative maximum frequency estimate - parameter int unsigned AxiLogDepth = 3, parameter int unsigned SyncStages = 2 ) ( input logic clk_phy_i, diff --git a/src/hyperbus_phy_if.sv b/src/hyperbus_phy_if.sv index 3203b63..28bff22 100644 --- a/src/hyperbus_phy_if.sv +++ b/src/hyperbus_phy_if.sv @@ -8,10 +8,8 @@ module hyperbus_phy_if import hyperbus_pkg::*; #( parameter int unsigned IsClockODelayed = 1, parameter int unsigned NumChips = 2, parameter int unsigned NumPhys = 2, - parameter int unsigned TimerWidth = 16, - parameter int unsigned RxFifoLogDepth = 3, parameter int unsigned StartupCycles = 60000, /*MHz*/ // Conservative maximum frequency estimate - parameter int unsigned SyncStages = 2, + parameter int unsigned SyncStages = 2, parameter type hyper_tx_t = logic, parameter type hyper_rx_t = logic )( @@ -61,11 +59,11 @@ module hyperbus_phy_if import hyperbus_pkg::*; #( logic [NumPhys-1:0][1:0] fifo_axi_usage; - logic tx_both_ready, ts_both_ready; - logic rx_both_valid, b_both_valid; + logic tx_both_ready, ts_both_ready; + logic rx_both_valid, b_both_valid; - logic [NumPhys-1:0] phy_tx_ready; - logic phy_tx_valid; + logic [NumPhys-1:0] phy_tx_ready; + logic phy_tx_valid; logic [NumPhys-1:0] phy_trans_ready; logic [NumPhys-1:0] phy_trans_valid; @@ -77,7 +75,7 @@ module hyperbus_phy_if import hyperbus_pkg::*; #( genvar i; generate - if (NumPhys==2) begin : phy_wrap + if (NumPhys==2) begin : phy_wrap logic [NumPhys-1:0] phy_enable; logic [NumPhys-1:0] phy_busy; @@ -156,41 +154,41 @@ module hyperbus_phy_if import hyperbus_pkg::*; #( .busy_o ( phy_busy[i] ), - .rx_data_o ( phy_fifo_rx[i].data ), - .rx_last_o ( phy_fifo_rx[i].last ), - .rx_error_o ( phy_fifo_rx[i].error ), - .rx_valid_o ( phy_fifo_valid[i] ), - .rx_ready_i ( phy_fifo_ready[i] ), + .rx_data_o ( phy_fifo_rx[i].data ), + .rx_last_o ( phy_fifo_rx[i].last ), + .rx_error_o ( phy_fifo_rx[i].error ), + .rx_valid_o ( phy_fifo_valid[i] ), + .rx_ready_i ( phy_fifo_ready[i] ), - .tx_data_i ( tx_i.data[16*i +:16] ), - .tx_strb_i ( tx_i.strb[2*i +:2] ), - .tx_last_i ( tx_i.last ), - .tx_valid_i ( phy_tx_valid ), - .tx_ready_o ( phy_tx_ready[i] ), + .tx_data_i ( tx_i.data[16*i +:16] ), + .tx_strb_i ( tx_i.strb[2*i +:2] ), + .tx_last_i ( tx_i.last ), + .tx_valid_i ( phy_tx_valid ), + .tx_ready_o ( phy_tx_ready[i] ), - .b_error_o ( phy_b_error[i] ), - .b_valid_o ( phy_b_valid[i] ), - .b_ready_i ( phy_b_ready ), + .b_error_o ( phy_b_error[i] ), + .b_valid_o ( phy_b_valid[i] ), + .b_ready_i ( phy_b_ready ), .trans_i ( trans_i ), .trans_cs_i ( trans_cs_i ), .trans_valid_i ( phy_trans_valid[i] ), .trans_ready_o ( phy_trans_ready[i] ), - .hyper_cs_no ( hyper_cs_no[i] ), - .hyper_ck_o ( hyper_ck_o[i] ), - .hyper_ck_no ( hyper_ck_no[i] ), - .hyper_rwds_o ( hyper_rwds_o[i] ), - .hyper_rwds_i ( hyper_rwds_i[i] ), - .hyper_rwds_oe_o( hyper_rwds_oe_o[i] ), - .hyper_dq_i ( hyper_dq_i[i] ), - .hyper_dq_o ( hyper_dq_o[i] ), - .hyper_dq_oe_o ( hyper_dq_oe_o[i] ), - .hyper_reset_no ( hyper_reset_no[i] ) - ); + .hyper_cs_no ( hyper_cs_no[i] ), + .hyper_ck_o ( hyper_ck_o[i] ), + .hyper_ck_no ( hyper_ck_no[i] ), + .hyper_rwds_o ( hyper_rwds_o[i] ), + .hyper_rwds_i ( hyper_rwds_i[i] ), + .hyper_rwds_oe_o( hyper_rwds_oe_o[i] ), + .hyper_dq_i ( hyper_dq_i[i] ), + .hyper_dq_o ( hyper_dq_o[i] ), + .hyper_dq_oe_o ( hyper_dq_oe_o[i] ), + .hyper_reset_no ( hyper_reset_no[i] ) + ); - end // for ( i=0; i Date: Tue, 28 Oct 2025 15:57:13 +0100 Subject: [PATCH 03/16] Fix fifo depth for clk_sys=clk_phy Prevents performance degradations (FIFO bubbles) in situation where the system and PHY are running on the same clock. --- src/hyperbus.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hyperbus.sv b/src/hyperbus.sv index ed7bdcf..7b4a005 100644 --- a/src/hyperbus.sv +++ b/src/hyperbus.sv @@ -28,8 +28,8 @@ module hyperbus #( parameter type reg_rsp_t = logic, parameter type axi_rule_t = logic, // The below have sensible defaults, but should be set on integration! - parameter int unsigned RxFifoLogDepth = 2, - parameter int unsigned TxFifoLogDepth = 2, + parameter int unsigned RxFifoLogDepth = 3, + parameter int unsigned TxFifoLogDepth = 3, parameter logic [RegDataWidth-1:0] RstChipBase = 'h0, // Base address for all chips parameter logic [RegDataWidth-1:0] RstChipSpace = 'h1_0000, // 64 KiB: Current maximum HyperBus device size parameter hyperbus_pkg::hyper_cfg_t RstCfg = hyperbus_pkg::gen_RstCfg(NumPhys,MinFreqMHz), From 04462ee00c05770e1cc749467cb599888e656ae6 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Tue, 28 Oct 2025 16:13:21 +0100 Subject: [PATCH 04/16] Add vsim outputs and model to gitignore --- .gitignore | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index bdf1e1b..a63d836 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ .bender scripts/compile.tcl -models/s27ks0641 \ No newline at end of file +axi_log/ +work/ +transcript +modelsim.ini +vsim.wlf +models/s27ks0641 From eb5edddf63d3c274235bb9242b739a6b792dfe5d Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Tue, 28 Oct 2025 17:08:49 +0100 Subject: [PATCH 05/16] Fix fifo depth for clk_sys=clk_phy Prevents performance degradations (FIFO bubbles) in situation where the system and PHY are running on the same clock. --- src/hyperbus_axi.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hyperbus_axi.sv b/src/hyperbus_axi.sv index ca78ed2..a54012b 100644 --- a/src/hyperbus_axi.sv +++ b/src/hyperbus_axi.sv @@ -170,7 +170,7 @@ module hyperbus_axi #( // ============================ axi_fifo #( - .Depth ( 4 ), + .Depth ( 8 ), .FallThrough ( 1'b0 ), .aw_chan_t ( axi_fifo_aw_chan_t ), .w_chan_t ( axi_fifo_w_chan_t ), @@ -420,7 +420,7 @@ module hyperbus_axi #( stream_fifo #( .FALL_THROUGH ( 1'b0 ), .T ( axi_w_chan_t ), - .DEPTH ( 8 ) + .DEPTH ( 16 ) ) wchan_stream_fifo ( .clk_i, .rst_ni, From 3ee09acd93ddaee2ea1f98c687572d5ef960bea3 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Tue, 28 Oct 2025 17:08:49 +0100 Subject: [PATCH 06/16] Remove unused parameters --- src/hyperbus_phy_if.sv | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/hyperbus_phy_if.sv b/src/hyperbus_phy_if.sv index 28bff22..55b395f 100644 --- a/src/hyperbus_phy_if.sv +++ b/src/hyperbus_phy_if.sv @@ -50,12 +50,12 @@ module hyperbus_phy_if import hyperbus_pkg::*; #( output logic [NumPhys-1:0] hyper_reset_no ); - phy_rx_t [NumPhys-1:0] phy_fifo_rx; - phy_rx_t [NumPhys-1:0] fifo_axi_rx; - logic [NumPhys-1:0] phy_fifo_valid; - logic [NumPhys-1:0] phy_fifo_ready; - logic [NumPhys-1:0] fifo_axi_valid; - logic fifo_axi_ready; + phy_rx_t [NumPhys-1:0] phy_fifo_rx; + phy_rx_t [NumPhys-1:0] fifo_axi_rx; + logic [NumPhys-1:0] phy_fifo_valid; + logic [NumPhys-1:0] phy_fifo_ready; + logic [NumPhys-1:0] fifo_axi_valid; + logic fifo_axi_ready; logic [NumPhys-1:0][1:0] fifo_axi_usage; @@ -150,7 +150,7 @@ module hyperbus_phy_if import hyperbus_pkg::*; #( .rst_ni ( rst_ni ), .test_mode_i ( test_mode_i ), - .cfg_i ( cfg_i ), + .cfg_i ( cfg_i ), .busy_o ( phy_busy[i] ), @@ -202,7 +202,7 @@ module hyperbus_phy_if import hyperbus_pkg::*; #( .rst_ni ( rst_ni ), .test_mode_i ( test_mode_i ), - .cfg_i ( cfg_i ), + .cfg_i ( cfg_i ), .busy_o ( ), From d6ce1df4511f5a4991e9b7a8f644365049228eeb Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Tue, 28 Oct 2025 17:08:49 +0100 Subject: [PATCH 07/16] Add configurable CS to clock-start delay Acording to spec: t_DSV (data strobe valid) which is the time from CS# going low to the first hyperbus clock can be at most 2 clock periods long (12ns@166MHz). This shrinks the RWDS valid window down to one period centered on CA4 (5th data transaction). Meaning it is valid around the 3rd rising edge of CK. Problem: With additional routing delay this may cause the RWDS sample register (clocked by clk_i) to miss the stable period of RWDS. Solution: Delaying the clock is allowed and gives RWDS more time to arrive and creates a larger stable window. It is possible to set this to zero to increase throughput. --- src/hyperbus_cfg_regs.sv | 6 ++++-- src/hyperbus_phy.sv | 29 +++++++++++++++++++++++------ src/hyperbus_pkg.sv | 7 +++++-- 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/src/hyperbus_cfg_regs.sv b/src/hyperbus_cfg_regs.sv index d438007..70ac78e 100644 --- a/src/hyperbus_cfg_regs.sv +++ b/src/hyperbus_cfg_regs.sv @@ -30,10 +30,10 @@ module hyperbus_cfg_regs #( `include "common_cells/registers.svh" // Internal Parameters - localparam int unsigned NumBaseRegs = 11; + localparam int unsigned NumBaseRegs = 12; localparam int unsigned NumRegs = 2*NumChips + NumBaseRegs; localparam int unsigned RegsBits = cf_math_pkg::idx_width(NumRegs); - localparam int unsigned RegStrbWidth = RegDataWidth/8; // TODO ASSERT: Must be power of two >= 16!! + localparam int unsigned RegStrbWidth = RegDataWidth/8; // Data and index types typedef logic [RegsBits-1:0] reg_idx_t; @@ -59,6 +59,7 @@ module hyperbus_cfg_regs #( if (sel_reg_mapped) begin rfield = { crange_q, + reg_data_t'(cfg_q.csn_to_ck_cycles), reg_data_t'(cfg_q.t_csh_cycles), reg_data_t'(cfg_q.which_phy), reg_data_t'(cfg_q.phys_in_use), @@ -99,6 +100,7 @@ module hyperbus_cfg_regs #( 'h8: cfg_d.phys_in_use = (NumPhys==1) ? 0 : ( (~wmask & cfg_q.phys_in_use ) | (wmask & reg_req_i.wdata) ); 'h9: cfg_d.which_phy = (NumPhys==1) ? 0 : ( (~wmask & cfg_q.which_phy ) | (wmask & reg_req_i.wdata) ); 'ha: cfg_d.t_csh_cycles = (~wmask & cfg_q.t_csh_cycles ) | (wmask & reg_req_i.wdata); + 'hb: cfg_d.csn_to_ck_cycles = (~wmask & cfg_q.csn_to_ck_cycles ) | (wmask & reg_req_i.wdata); default: begin {sel_chip, chip_reg} = sel_reg - NumBaseRegs; crange_d[sel_chip][chip_reg] = (~wmask & crange_q[sel_chip][chip_reg]) | (wmask & reg_req_i.wdata); diff --git a/src/hyperbus_phy.sv b/src/hyperbus_phy.sv index 78ed27d..e9229fe 100644 --- a/src/hyperbus_phy.sv +++ b/src/hyperbus_phy.sv @@ -269,15 +269,32 @@ module hyperbus_phy import hyperbus_pkg::*; #( if (trans_valid_i & ~b_pending_q & r_outstand_q == '0) begin tf_d = trans_i; cs_d = trans_cs_i; - // Send 3 CA words (t_CSS respected through clock delay) - timer_d = 2; - state_d = SendCA; - // Enable output driver (needs to be enabled one cycle - // earlier since tri-state enables of IO pads are quite - // slow compared to the data pins) + + if(cfg_i.csn_to_ck_cycles != 0) begin + // asser CS but delay hyper_ck to allow more time + // for memory to drive RWDS (to satisfy t_DSV) + state_d = DelayCK; + timer_d = cfg_i.csn_to_ck_cycles -1; + end else begin + // max throughput when memory RWDS signal arrives early + state_d = SendCA; + // Send 3 CA words (t_CSS respected through clock delay) + timer_d = 2; + end + + // Enable output driver (needs to be enabled at least + // one cycle earlier since tri-state enables of IO pads + // are quite slow compared to the data pins) trx_tx_data_oe = 1'b1; end end + DelayCK: begin + trx_clk_ena = 1'b0; + if (ctl_timer_zero) begin + timer_d = 2; // Send 3 CA words + state_d = SendCA; + end + end SendCA: begin // Dataflow handled outside FSM trx_clk_ena = 1'b1; diff --git a/src/hyperbus_pkg.sv b/src/hyperbus_pkg.sv index 2ea4f47..31e0482 100644 --- a/src/hyperbus_pkg.sv +++ b/src/hyperbus_pkg.sv @@ -20,7 +20,8 @@ package hyperbus_pkg; logic address_space; logic phys_in_use; logic which_phy; - logic [3:0] t_csh_cycles; //add an configurable Tcsh for high freq operation(200MHz Hyperram) + logic [3:0] t_csh_cycles; // add an configurable Tcsh for high freq operation(200MHz Hyperram) + logic [3:0] csn_to_ck_cycles; // delay hyper_ck after CS is asserted (more time for t_DSV) } hyper_cfg_t; typedef struct packed { @@ -40,6 +41,7 @@ package hyperbus_pkg; typedef enum logic[3:0] { Startup, Idle, + DelayCK, SendCA, WaitLatAccess, Read, @@ -74,7 +76,8 @@ package hyperbus_pkg; address_space: 'b0, phys_in_use: NumPhys-1, which_phy: NumPhys-1, - t_csh_cycles: 'h1 + t_csh_cycles: 'h1, + csn_to_ck_cycles: 'h2 }; return cfg; From 239d0440a4a2c8948925b369f5d250b029debff4 Mon Sep 17 00:00:00 2001 From: phsauter Date: Tue, 28 Oct 2025 17:09:06 +0100 Subject: [PATCH 08/16] Enforce optimal worst-case RWDS sampling For the worst case RWDS timing (t_DSV max, t_CSS min and t_CKDS min) the window of validity for RWDS is around one clock period centered around the 3rd rising edge of CK. This ensures we sample exactly then. Other sampling may lead to improper results (from sampling high Z) and increases the risk of metastability. For long chip-to-chip delays (or slow pads) it may still be necessary to increase the CS falling edge to first CK edge time. --- src/hyperbus_trx.sv | 50 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 44 insertions(+), 6 deletions(-) diff --git a/src/hyperbus_trx.sv b/src/hyperbus_trx.sv index b442b10..53de430 100644 --- a/src/hyperbus_trx.sv +++ b/src/hyperbus_trx.sv @@ -65,6 +65,11 @@ module hyperbus_trx #( logic rx_rwds_fifo_valid; logic rx_rwds_fifo_ready; + // used to time the sampling of RWDS to determine additional latency + logic [2:0] ck_cnt_d, ck_cnt_q; // TODO: check in sim if this can be one less + logic rwds_sample_ena; + logic rwds_sample_clk; + // Feed through async reset assign hyper_reset_no = rst_ni; @@ -127,16 +132,49 @@ module hyperbus_trx #( end end - // Sample RWDS on demand for extra latency determination - always_ff @(posedge clk_i or negedge rst_ni) begin : proc_ff_rwds_sample - if (~rst_ni) rwds_sample_o <= '0; - else if (rwds_sample_ena_i) rwds_sample_o <= hyper_rwds_i; - end - // ======== // RX // ======== + // The following guarantees a proper worst-case sampling of RWDS. + // RWDS may only be valid (and stable) for a single period around + // the 3rd hyper_ck_o rising edge (see t_DSV, t_CSS, t_CKDS @ 166MHz). + // We create a clock gate that open just for this window from falling + // to falling edge of hyper_ck around the 3rd rising edge. + // Then and only then will the sample be taken. + + // Constraints: + // ena_i is clocked from clk_i, ck_cnt with ~clk_90_i and + // rwds_sample_o with clk_90_i. + // The paths are very short and as long the clk to clk_90 + // constraints are proper (clk_90 being a derived shifted clock) + // this should not cause any problems + always_comb begin : gen_ck_counter + ck_cnt_d = clk_cnt_q; + // controlled by above FSM, only true in SendCA state + if(trx_rwds_sample_ena) + ck_cnt_d = ck_cnt_q +1; + // reset counter when the transaction ends (CS goes high) + if(hyper_cs_no) + ck_cnt_d = '0; + end + // clocked with falling edge, creates an active clk-gate around rising edge + `FF(ck_cnt_q, ck_cnt_d, '0, hyper_ck_no); + + assign rwds_sample_ena = (ck_cnt_q == 2) & rwds_sample_ena_i; // TODO: Check proper sampling point in sim + + // Gate the sampling of rwds to the third rising CK_90 edge only + tc_clk_gating i_rwds_in_clk_gate ( + .clk_i ( hyper_ck_o ), + .en_i ( rwds_sample_ena ), + .test_en_i ( test_mode_i ), + .clk_o ( rwds_sample_clk ) + ); + // Sample RWDS on demand for extra latency determination + `FF(rwds_sample_o, hyper_rwds_i, '0, rwds_sample_clk); + //------------------------------------------------------------------------- + + // Set and Reset RX clock enable always_ff @(posedge clk_i or negedge rst_ni) begin : proc_ff_rx_delay if (~rst_ni) rx_rwds_clk_ena <= 1'b0; From a55f9763d2b5301b1a324c2c96bc67d74e742b54 Mon Sep 17 00:00:00 2001 From: phsauter Date: Tue, 28 Oct 2025 17:09:06 +0100 Subject: [PATCH 09/16] Remove phy-fsm singals from RWDS sampling Decouples the clock domain better, only the rwds_sample_o signal crosses between phy and system clk. --- src/hyperbus_trx.sv | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/hyperbus_trx.sv b/src/hyperbus_trx.sv index 53de430..ebf803e 100644 --- a/src/hyperbus_trx.sv +++ b/src/hyperbus_trx.sv @@ -144,24 +144,23 @@ module hyperbus_trx #( // Then and only then will the sample be taken. // Constraints: - // ena_i is clocked from clk_i, ck_cnt with ~clk_90_i and - // rwds_sample_o with clk_90_i. - // The paths are very short and as long the clk to clk_90 - // constraints are proper (clk_90 being a derived shifted clock) - // this should not cause any problems + // As long as the clk to clk_90 constraints are proper + // (clk_90 being a derived shifted clock) this should not cause problems + always_comb begin : gen_ck_counter - ck_cnt_d = clk_cnt_q; - // controlled by above FSM, only true in SendCA state - if(trx_rwds_sample_ena) - ck_cnt_d = ck_cnt_q +1; + ck_cnt_d = ck_cnt_q +1; // count hyper_ck falling edges + // reset counter when the transaction ends (CS goes high) - if(hyper_cs_no) + if(hyper_cs_no) begin ck_cnt_d = '0; + end else if(ck_cnt_q == 3) begin // stop counting once sample is taken + ck_cnt_d = ck_cnt_q; + end end // clocked with falling edge, creates an active clk-gate around rising edge `FF(ck_cnt_q, ck_cnt_d, '0, hyper_ck_no); - assign rwds_sample_ena = (ck_cnt_q == 2) & rwds_sample_ena_i; // TODO: Check proper sampling point in sim + assign rwds_sample_ena = (ck_cnt_q == 2); // TODO: Check proper sampling point in sim // Gate the sampling of rwds to the third rising CK_90 edge only tc_clk_gating i_rwds_in_clk_gate ( From abf586bcac51386ce22710c9367b54f65874cd2b Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Tue, 28 Oct 2025 17:09:06 +0100 Subject: [PATCH 10/16] Refactor RWDS sampling to separate module --- Bender.yml | 1 + src/hyperbus_rwds_sampler.sv | 86 ++++++++++++++++++++++++++++++++++++ src/hyperbus_trx.sv | 49 +++++--------------- 3 files changed, 99 insertions(+), 37 deletions(-) create mode 100644 src/hyperbus_rwds_sampler.sv diff --git a/Bender.yml b/Bender.yml index e5a6717..1ea23f4 100644 --- a/Bender.yml +++ b/Bender.yml @@ -34,6 +34,7 @@ sources: - src/hyperbus_phy2r.sv - src/hyperbus_ddr_out.sv - src/hyperbus_delay.sv + - src/hyperbus_rwds_sampler.sv - src/hyperbus_trx.sv - src/hyperbus_cfg_regs.sv - src/hyperbus_phy.sv diff --git a/src/hyperbus_rwds_sampler.sv b/src/hyperbus_rwds_sampler.sv new file mode 100644 index 0000000..1237b5f --- /dev/null +++ b/src/hyperbus_rwds_sampler.sv @@ -0,0 +1,86 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Philippe Sauter + +// This modules guarantees proper worst-case sampling of RWDS. +// RWDS may only be valid (and stable) for a single period around +// the 3rd hyper_ck_o rising edge (see t_DSV, t_CSS, t_CKDS @ 166MHz). +// Since there may be arbitrary pad and PCBs delays, the sampling edge +// is fully configurable (edge number and polarity). +// A gated clock that is only active around one edge is created +// and then the sample is taken only at the selected edge. +// The final sample is saved into a register in the phy clock domain. +// +// It is not possible to sample on the very first rising clock edge +// with this mechanism. +// Therefore cfg_edge_idx_i = 0 selects the first failling edge +// or the subsequent rising edge, depending on cfg_edge_pol_i. +// With this naming scheme, the default edge should be idx=1, pol=1. +// +// Constraints: +// cfg* signals are pseudostatic (set_false_path -setup or set_multicycle_path) + +`include "common_cells/registers.svh" + +module hyperbus_rwds_sampler import hyperbus_pkg::*; #() +( + // Global signals + input logic clk_i, + input logic rst_ni, + input logic test_mode_i, + + // Transciever control: facing controller + output logic rwds_sample_o, + + // Physical interace: facing HyperBus + input logic hyper_cs_ni, + input logic hyper_ck_i, + input logic hyper_ck_ni, + input logic hyper_rwds_i, + input logic hyper_rwds_oe_i +); + + // used to time the sampling of RWDS to determine additional latency + logic [2:0] ck_cnt_d, ck_cnt_q; // TODO: check in sim if this can be one less + logic rwds_sample_ena; + logic rwds_sample_clk; + + // The following guarantees a proper worst-case sampling of RWDS. + // RWDS may only be valid (and stable) for a single period around + // the 3rd hyper_ck_o rising edge (see t_DSV, t_CSS, t_CKDS @ 166MHz). + // We create a clock gate that open just for this window from falling + // to falling edge of hyper_ck around the 3rd rising edge. + // Then and only then will the sample be taken. + + // Constraints: + // As long as the clk to clk_90 constraints are proper + // (clk_90 being a derived shifted clock) this should not cause problems + + always_comb begin : gen_ck_counter + ck_cnt_d = ck_cnt_q +1; // count hyper_ck falling edges + + // reset counter when the transaction ends (CS goes high) + if(hyper_cs_ni) begin + ck_cnt_d = '0; + end else if(ck_cnt_q == 3) begin // stop counting once sample is taken + ck_cnt_d = ck_cnt_q; + end + end + // clocked with falling edge, creates an active clk-gate around rising edge + `FF(ck_cnt_q, ck_cnt_d, '0, hyper_ck_ni); + + assign rwds_sample_ena = (ck_cnt_q == 2); // TODO: Check proper sampling point in sim + + // Gate the sampling of rwds to the third rising CK_90 edge only + tc_clk_gating i_rwds_in_clk_gate ( + .clk_i ( hyper_ck_i ), + .en_i ( rwds_sample_ena ), + .test_en_i ( test_mode_i ), + .clk_o ( rwds_sample_clk ) + ); + // Sample RWDS on demand for extra latency determination + `FF(rwds_sample_o, hyper_rwds_i, '0, rwds_sample_clk); + +endmodule diff --git a/src/hyperbus_trx.sv b/src/hyperbus_trx.sv index ebf803e..fc6772b 100644 --- a/src/hyperbus_trx.sv +++ b/src/hyperbus_trx.sv @@ -136,43 +136,18 @@ module hyperbus_trx #( // RX // ======== - // The following guarantees a proper worst-case sampling of RWDS. - // RWDS may only be valid (and stable) for a single period around - // the 3rd hyper_ck_o rising edge (see t_DSV, t_CSS, t_CKDS @ 166MHz). - // We create a clock gate that open just for this window from falling - // to falling edge of hyper_ck around the 3rd rising edge. - // Then and only then will the sample be taken. - - // Constraints: - // As long as the clk to clk_90 constraints are proper - // (clk_90 being a derived shifted clock) this should not cause problems - - always_comb begin : gen_ck_counter - ck_cnt_d = ck_cnt_q +1; // count hyper_ck falling edges - - // reset counter when the transaction ends (CS goes high) - if(hyper_cs_no) begin - ck_cnt_d = '0; - end else if(ck_cnt_q == 3) begin // stop counting once sample is taken - ck_cnt_d = ck_cnt_q; - end - end - // clocked with falling edge, creates an active clk-gate around rising edge - `FF(ck_cnt_q, ck_cnt_d, '0, hyper_ck_no); - - assign rwds_sample_ena = (ck_cnt_q == 2); // TODO: Check proper sampling point in sim - - // Gate the sampling of rwds to the third rising CK_90 edge only - tc_clk_gating i_rwds_in_clk_gate ( - .clk_i ( hyper_ck_o ), - .en_i ( rwds_sample_ena ), - .test_en_i ( test_mode_i ), - .clk_o ( rwds_sample_clk ) - ); - // Sample RWDS on demand for extra latency determination - `FF(rwds_sample_o, hyper_rwds_i, '0, rwds_sample_clk); - //------------------------------------------------------------------------- - + // guarantees proper worst-case sampling of RWDS + hyperbus_rwds_sampler i_rwds_sampler ( + .clk_i, + .rst_ni, + .test_mode_i, + .rwds_sample_o, + .hyper_cs_ni ( hyper_cs_no ), + .hyper_ck_i ( hyper_ck_o ), + .hyper_ck_ni ( hyper_ck_no ), + .hyper_rwds_i ( hyper_rwds_i ), + .hyper_rwds_oe_i ( ) + ) // Set and Reset RX clock enable always_ff @(posedge clk_i or negedge rst_ni) begin : proc_ff_rx_delay From 34d57040a1271e7d0614c06090ad68dda6ecee61 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Tue, 28 Oct 2025 17:09:06 +0100 Subject: [PATCH 11/16] More configurable RWDS sampling Exact sampling edge is adjustable. --- src/hyperbus_rwds_sampler.sv | 102 ++++++++++++++++++++--------------- src/hyperbus_trx.sv | 23 ++++---- 2 files changed, 73 insertions(+), 52 deletions(-) diff --git a/src/hyperbus_rwds_sampler.sv b/src/hyperbus_rwds_sampler.sv index 1237b5f..b5f1a7d 100644 --- a/src/hyperbus_rwds_sampler.sv +++ b/src/hyperbus_rwds_sampler.sv @@ -27,60 +27,76 @@ module hyperbus_rwds_sampler import hyperbus_pkg::*; #() ( // Global signals - input logic clk_i, - input logic rst_ni, - input logic test_mode_i, - - // Transciever control: facing controller - output logic rwds_sample_o, - - // Physical interace: facing HyperBus - input logic hyper_cs_ni, - input logic hyper_ck_i, - input logic hyper_ck_ni, - input logic hyper_rwds_i, - input logic hyper_rwds_oe_i + input logic clk_i, // phy clock + input logic rst_ni, + input logic test_mode_i, + + input logic [1:0] cfg_edge_idx_i, // #edge where rwds is sampled + input logic cfg_edge_pol_i, // 1: rising, 0: falling + + // sampled value going to PHY-FSM + output logic rwds_sample_o, + + // Physical interface: facing HyperBus + input logic hyper_cs_ni, + input logic hyper_ck_i, + input logic hyper_ck_ni, + input logic hyper_rwds_i ); // used to time the sampling of RWDS to determine additional latency - logic [2:0] ck_cnt_d, ck_cnt_q; // TODO: check in sim if this can be one less - logic rwds_sample_ena; - logic rwds_sample_clk; - - // The following guarantees a proper worst-case sampling of RWDS. - // RWDS may only be valid (and stable) for a single period around - // the 3rd hyper_ck_o rising edge (see t_DSV, t_CSS, t_CKDS @ 166MHz). - // We create a clock gate that open just for this window from falling - // to falling edge of hyper_ck around the 3rd rising edge. - // Then and only then will the sample be taken. - - // Constraints: - // As long as the clk to clk_90 constraints are proper - // (clk_90 being a derived shifted clock) this should not cause problems + logic [2:0] cnt_edge_d, cnt_edge_q; // one bit larger than config + logic [2:0] cnt_target_value; + logic cnt_clk; // clock used for edge counting + logic sampling_clk, sampling_clk_gated; // clock used for sampling + logic enable_sampling; + logic rwds_sample; + + assign cnt_target_value = cfg_edge_idx_i + 1; - always_comb begin : gen_ck_counter - ck_cnt_d = ck_cnt_q +1; // count hyper_ck falling edges + always_comb begin : gen_edge_cnt + cnt_edge_d = cnt_edge_q +1; // count hyper_ck(_n) edges // reset counter when the transaction ends (CS goes high) if(hyper_cs_ni) begin - ck_cnt_d = '0; - end else if(ck_cnt_q == 3) begin // stop counting once sample is taken - ck_cnt_d = ck_cnt_q; + cnt_edge_d = '0; + end else if(cnt_edge_q == '1) begin // stop counting to avoid overflow + cnt_edge_d = cnt_edge_q; end end - // clocked with falling edge, creates an active clk-gate around rising edge - `FF(ck_cnt_q, ck_cnt_d, '0, hyper_ck_ni); + // sampling on the rising edge requires counting on falling edges to create + // a window where the clk-gate is transparent around rising edge and vice versa + tc_clk_mux2 i_cnt_clk_mux ( + .clk0_i ( hyper_ck_ni ), + .clk1_i ( hyper_ck_i ), + .clk_sel_i ( ~cfg_edge_pol_i ), + .clk_o ( cnt_clk ) + ); - assign rwds_sample_ena = (ck_cnt_q == 2); // TODO: Check proper sampling point in sim + `FF(cnt_edge_q, cnt_edge_d, '0, cnt_clk); - // Gate the sampling of rwds to the third rising CK_90 edge only - tc_clk_gating i_rwds_in_clk_gate ( - .clk_i ( hyper_ck_i ), - .en_i ( rwds_sample_ena ), - .test_en_i ( test_mode_i ), - .clk_o ( rwds_sample_clk ) + // TODO: Check proper sampling point in sim + assign enable_sampling = (cnt_edge_q == cnt_target_value); + + tc_clk_mux2 i_sampling_clk_mux ( + .clk0_i ( hyper_ck_ni ), + .clk1_i ( hyper_ck_i ), + .clk_sel_i ( cfg_edge_pol_i ), + .clk_o ( sampling_clk ) + ); + + // gate the sampling of rwds to the correct rising clock edge + tc_clk_gating i_rwds_sample_rise_gate ( + .clk_i ( sampling_clk ), + .en_i ( enable_sampling ), + .test_en_i ( test_mode_i ), + .clk_o ( sampling_clk_gated ) ); - // Sample RWDS on demand for extra latency determination - `FF(rwds_sample_o, hyper_rwds_i, '0, rwds_sample_clk); + + // sample rwds exactly once at the correct edge + `FF(rwds_sample, hyper_rwds_i, '0, sampling_clk_gated); + + // pass rwds to phy-clock domain + `FF(rwds_sample_o, rwds_sample, '0, clk_i); endmodule diff --git a/src/hyperbus_trx.sv b/src/hyperbus_trx.sv index fc6772b..bb00782 100644 --- a/src/hyperbus_trx.sv +++ b/src/hyperbus_trx.sv @@ -13,11 +13,15 @@ module hyperbus_trx #( parameter int unsigned SyncStages = 2 )( // Global signals - input logic clk_i, - input logic clk_i_90, - input logic rst_ni, - input logic test_mode_i, - // Transciever control: facing controller + input logic clk_i, + input logic clk_i_90, + input logic rst_ni, + input logic test_mode_i, + + input logic [1:0] cfg_edge_idx_i, + input logic cfg_edge_pol_i, + + // Transceiver control: facing controller input logic [NumChips-1:0] cs_i, input logic cs_ena_i, output logic rwds_sample_o, @@ -36,7 +40,7 @@ module hyperbus_trx #( output logic [15:0] rx_data_o, output logic rx_valid_o, input logic rx_ready_i, - // Physical interace: facing HyperBus + // Physical interface: facing HyperBus output logic [NumChips-1:0] hyper_cs_no, output logic hyper_ck_o, output logic hyper_ck_no, @@ -136,17 +140,18 @@ module hyperbus_trx #( // RX // ======== - // guarantees proper worst-case sampling of RWDS + // sample RWDS for extra latency determination (adjustable sampling edge) hyperbus_rwds_sampler i_rwds_sampler ( .clk_i, .rst_ni, .test_mode_i, + .cfg_edge_idx_i, + .cfg_edge_pol_i, .rwds_sample_o, .hyper_cs_ni ( hyper_cs_no ), .hyper_ck_i ( hyper_ck_o ), .hyper_ck_ni ( hyper_ck_no ), - .hyper_rwds_i ( hyper_rwds_i ), - .hyper_rwds_oe_i ( ) + .hyper_rwds_i ( hyper_rwds_i ) ) // Set and Reset RX clock enable From 7c49294f5e397f881071dffa1b47f3d4a227e453 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Tue, 28 Oct 2025 17:09:06 +0100 Subject: [PATCH 12/16] Delay additional latency decision, add cfg regs We want to give the RWDS sampler as much time as possible to get a value. So we delay the additional latency decision to the latest point possible. --- src/hyperbus_cfg_regs.sv | 4 ++- src/hyperbus_phy.sv | 78 +++++++++++++++++++++++++++------------- src/hyperbus_pkg.sv | 37 ++++++++++++------- src/hyperbus_trx.sv | 2 +- 4 files changed, 81 insertions(+), 40 deletions(-) diff --git a/src/hyperbus_cfg_regs.sv b/src/hyperbus_cfg_regs.sv index 70ac78e..680aa67 100644 --- a/src/hyperbus_cfg_regs.sv +++ b/src/hyperbus_cfg_regs.sv @@ -30,7 +30,7 @@ module hyperbus_cfg_regs #( `include "common_cells/registers.svh" // Internal Parameters - localparam int unsigned NumBaseRegs = 12; + localparam int unsigned NumBaseRegs = 13; localparam int unsigned NumRegs = 2*NumChips + NumBaseRegs; localparam int unsigned RegsBits = cf_math_pkg::idx_width(NumRegs); localparam int unsigned RegStrbWidth = RegDataWidth/8; @@ -59,6 +59,7 @@ module hyperbus_cfg_regs #( if (sel_reg_mapped) begin rfield = { crange_q, + reg_data_t'(cfg_q.rwds_sample_edge), reg_data_t'(cfg_q.csn_to_ck_cycles), reg_data_t'(cfg_q.t_csh_cycles), reg_data_t'(cfg_q.which_phy), @@ -101,6 +102,7 @@ module hyperbus_cfg_regs #( 'h9: cfg_d.which_phy = (NumPhys==1) ? 0 : ( (~wmask & cfg_q.which_phy ) | (wmask & reg_req_i.wdata) ); 'ha: cfg_d.t_csh_cycles = (~wmask & cfg_q.t_csh_cycles ) | (wmask & reg_req_i.wdata); 'hb: cfg_d.csn_to_ck_cycles = (~wmask & cfg_q.csn_to_ck_cycles ) | (wmask & reg_req_i.wdata); + 'hc: cfg_d.rwds_sample_edge = (~wmask & cfg_q.rwds_sample_edge ) | (wmask & reg_req_i.wdata); default: begin {sel_chip, chip_reg} = sel_reg - NumBaseRegs; crange_d[sel_chip][chip_reg] = (~wmask & crange_q[sel_chip][chip_reg]) | (wmask & reg_req_i.wdata); diff --git a/src/hyperbus_phy.sv b/src/hyperbus_phy.sv index e9229fe..47e6420 100644 --- a/src/hyperbus_phy.sv +++ b/src/hyperbus_phy.sv @@ -123,22 +123,24 @@ module hyperbus_phy import hyperbus_pkg::*; #( .clk_i_90, .rst_ni, .test_mode_i, - .cs_i ( cs_q ), - .cs_ena_i ( trx_cs_ena ), - .rwds_sample_o ( trx_rwds_sample ), - .rwds_sample_ena_i ( trx_rwds_sample_ena ), - .tx_clk_delay_i ( cfg_i.t_tx_clk_delay ), - .tx_clk_ena_i ( trx_clk_ena ), - .tx_data_i ( trx_tx_data ), - .tx_data_oe_i ( trx_tx_data_oe ), - .tx_rwds_i ( trx_tx_rwds ), - .tx_rwds_oe_i ( trx_tx_rwds_oe ), - .rx_clk_delay_i ( cfg_i.t_rx_clk_delay ), - .rx_clk_set_i ( trx_rx_clk_set ), - .rx_clk_reset_i ( trx_rx_clk_reset ), - .rx_data_o ( trx_rx_data ), - .rx_valid_o ( trx_rx_valid ), - .rx_ready_i ( trx_rx_ready ), + .cfg_edge_idx_i ( cfg_i.rwds_sample_edge.cylce_idx ), + .cfg_edge_pol_i ( cfg_i.rwds_sample_edge.polarity ), + .cs_i ( cs_q ), + .cs_ena_i ( trx_cs_ena ), + .rwds_sample_o ( trx_rwds_sample ), + .rwds_sample_ena_i ( trx_rwds_sample_ena ), + .tx_clk_delay_i ( cfg_i.t_tx_clk_delay ), + .tx_clk_ena_i ( trx_clk_ena ), + .tx_data_i ( trx_tx_data ), + .tx_data_oe_i ( trx_tx_data_oe ), + .tx_rwds_i ( trx_tx_rwds ), + .tx_rwds_oe_i ( trx_tx_rwds_oe ), + .rx_clk_delay_i ( cfg_i.t_rx_clk_delay ), + .rx_clk_set_i ( trx_rx_clk_set ), + .rx_clk_reset_i ( trx_rx_clk_reset ), + .rx_data_o ( trx_rx_data ), + .rx_valid_o ( trx_rx_valid ), + .rx_ready_i ( trx_rx_ready ), .hyper_cs_no, .hyper_ck_o, .hyper_ck_no, @@ -271,7 +273,7 @@ module hyperbus_phy import hyperbus_pkg::*; #( cs_d = trans_cs_i; if(cfg_i.csn_to_ck_cycles != 0) begin - // asser CS but delay hyper_ck to allow more time + // assert CS but delay hyper_ck to allow more time // for memory to drive RWDS (to satisfy t_DSV) state_d = DelayCK; timer_d = cfg_i.csn_to_ck_cycles -1; @@ -305,7 +307,7 @@ module hyperbus_phy import hyperbus_pkg::*; #( timer_d = cfg_i.t_burst_max; state_d = Write; end else begin - timer_d = TimerWidth'(cfg_i.t_latency_access) << ctl_add_latency; + timer_d = TimerWidth'(cfg_i.t_latency_access); state_d = WaitLatAccess; end end @@ -313,18 +315,44 @@ module hyperbus_phy import hyperbus_pkg::*; #( WaitLatAccess: begin trx_clk_ena = 1'b1; trx_tx_data_oe = 1'b1; - // Substract cycle for last CA and another for state delay + // ctl_add_latency may arrive at any time (adjustable RWDS sampling) + // If no additional latency required: + if (~ctl_add_latency) begin + // Substract cycle for last CA and another for state delay + if(ctl_timer_two) begin + timer_d = cfg_i.t_burst_max; + // Switch to write or read phase and already start + // turnaround of tri-state driver (depending on latency + // config and if read or write transaction). + if (tf_q.write) begin + state_d = Write; + trx_tx_data_oe = 1'b1; + // For zero latency writes, we must not drive the RWDS + // signal (see specs page 9). Depending on the latency + // mode we thus drive only the DQ signals or DQ + RWDS. + trx_tx_rwds_oe = ~ctl_write_zero_lat; + end else begin + state_d = Read; + trx_tx_data_oe = 1'b0; + trx_tx_rwds_oe = 1'b0; + end + end + end else if (ctl_timer_one) begin + // instead of going to 0, add another latency count + state_d = WaitAddLatAccess; + timer_d = TimerWidth'(cfg_i.t_latency_access); + end + end + WaitAddLatAccess: begin + // Same as WaitLatAccess but without possibility + // of adding another latency count + trx_clk_ena = 1'b1; + trx_tx_data_oe = 1'b1; if (ctl_timer_two) begin timer_d = cfg_i.t_burst_max; - // Switch to write or read phase and already start - // turnaround of tri-state driver (depending on latency - // config and if read or write transaction). if (tf_q.write) begin state_d = Write; trx_tx_data_oe = 1'b1; - // For zero latency writes, we must not drive the RWDS - // signal (see specs page 9). Depending on the latency - // mode we thus drive only the DQ signals or DQ + RWDS. trx_tx_rwds_oe = ~ctl_write_zero_lat; end else begin state_d = Read; diff --git a/src/hyperbus_pkg.sv b/src/hyperbus_pkg.sv index 31e0482..cf9126b 100644 --- a/src/hyperbus_pkg.sv +++ b/src/hyperbus_pkg.sv @@ -8,20 +8,27 @@ package hyperbus_pkg; localparam unsigned HyperBurstWidth = 8 + $clog2(1024/16) + 1; typedef logic [HyperBurstWidth-1:0] hyper_blen_t; + + typedef struct packed { + logic [1:0] cylce_idx; + logic polarity; + } hyper_cfg_rwds_t; + // configuration type typedef struct packed { - logic [3:0] t_latency_access; - logic en_latency_additional; - logic [15:0] t_burst_max; - logic [3:0] t_read_write_recovery; - logic [3:0] t_rx_clk_delay; - logic [3:0] t_tx_clk_delay; - logic [4:0] address_mask_msb; - logic address_space; - logic phys_in_use; - logic which_phy; - logic [3:0] t_csh_cycles; // add an configurable Tcsh for high freq operation(200MHz Hyperram) - logic [3:0] csn_to_ck_cycles; // delay hyper_ck after CS is asserted (more time for t_DSV) + logic [3:0] t_latency_access; + logic en_latency_additional; + logic [15:0] t_burst_max; + logic [3:0] t_read_write_recovery; + logic [3:0] t_rx_clk_delay; + logic [3:0] t_tx_clk_delay; + logic [4:0] address_mask_msb; + logic address_space; + logic phys_in_use; + logic which_phy; + logic [3:0] t_csh_cycles; // add an configurable Tcsh for high freq operation(200MHz Hyperram) + logic [3:0] csn_to_ck_cycles; // delay hyper_ck after CS is asserted (more time for t_DSV) + hyper_cfg_rwds_t rwds_sample_edge; } hyper_cfg_t; typedef struct packed { @@ -44,6 +51,7 @@ package hyperbus_pkg; DelayCK, SendCA, WaitLatAccess, + WaitAddLatAccess, Read, Write, WaitXfer, @@ -77,7 +85,10 @@ package hyperbus_pkg; phys_in_use: NumPhys-1, which_phy: NumPhys-1, t_csh_cycles: 'h1, - csn_to_ck_cycles: 'h2 + csn_to_ck_cycles: 'h2, + rwds_sample_edge: hyper_cfg_rwds_t'{ // third rising edge, see hyperbus_rwds_sampler + cylce_idx: 'h1, + polarity: 'b1 } }; return cfg; diff --git a/src/hyperbus_trx.sv b/src/hyperbus_trx.sv index bb00782..0a15286 100644 --- a/src/hyperbus_trx.sv +++ b/src/hyperbus_trx.sv @@ -152,7 +152,7 @@ module hyperbus_trx #( .hyper_ck_i ( hyper_ck_o ), .hyper_ck_ni ( hyper_ck_no ), .hyper_rwds_i ( hyper_rwds_i ) - ) + ); // Set and Reset RX clock enable always_ff @(posedge clk_i or negedge rst_ni) begin : proc_ff_rx_delay From 8c986b50e899a51a6ee32c558d8f5d920eee0df9 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Tue, 28 Oct 2025 17:09:06 +0100 Subject: [PATCH 13/16] Fix reset of rwds sampling counter The reset was not being triggered since the gated clock stops before chip select goes high. A sticky bit driven by the ungated clock is used to indicate start of transfer. The counter reaching the target value is used to reset the sticky bit. Counter only counts while it is set (when the transfer starts until the target is reached). --- src/hyperbus_rwds_sampler.sv | 41 +++++++++++++++++++++++++----------- src/hyperbus_trx.sv | 3 ++- 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/src/hyperbus_rwds_sampler.sv b/src/hyperbus_rwds_sampler.sv index b5f1a7d..01dac40 100644 --- a/src/hyperbus_rwds_sampler.sv +++ b/src/hyperbus_rwds_sampler.sv @@ -37,7 +37,10 @@ module hyperbus_rwds_sampler import hyperbus_pkg::*; #() // sampled value going to PHY-FSM output logic rwds_sample_o, - // Physical interface: facing HyperBus + // ungated hyperbus clock + input logic tx_clk_90_i, + + // physical HyperBus signals input logic hyper_cs_ni, input logic hyper_ck_i, input logic hyper_ck_ni, @@ -46,22 +49,24 @@ module hyperbus_rwds_sampler import hyperbus_pkg::*; #() // used to time the sampling of RWDS to determine additional latency logic [2:0] cnt_edge_d, cnt_edge_q; // one bit larger than config + logic start_of_tf_d, start_of_tf_q; // start of transfer indicator logic [2:0] cnt_target_value; + logic cnt_at_target; logic cnt_clk; // clock used for edge counting logic sampling_clk, sampling_clk_gated; // clock used for sampling - logic enable_sampling; + logic enable_sampling; // sampling clock gate enable logic rwds_sample; assign cnt_target_value = cfg_edge_idx_i + 1; + assign cnt_at_target = (cnt_target_value == cnt_edge_q); always_comb begin : gen_edge_cnt - cnt_edge_d = cnt_edge_q +1; // count hyper_ck(_n) edges - - // reset counter when the transaction ends (CS goes high) - if(hyper_cs_ni) begin - cnt_edge_d = '0; - end else if(cnt_edge_q == '1) begin // stop counting to avoid overflow - cnt_edge_d = cnt_edge_q; + // only count at the start of a transfer + if(start_of_tf_q) begin + cnt_edge_d = cnt_edge_q +1; // count hyper_ck(_n) edges + end else begin + // reset counter for next start of transfer + cnt_edge_d = 1'b0; end end // sampling on the rising edge requires counting on falling edges to create @@ -75,8 +80,20 @@ module hyperbus_rwds_sampler import hyperbus_pkg::*; #() `FF(cnt_edge_q, cnt_edge_d, '0, cnt_clk); + // used to reset counter and ensure clock gate opens only once + // clocked with ungated clock to detect cs_n going high + always_comb begin : gen_start_of_transfer + start_of_tf_d = start_of_tf_q; + if(hyper_cs_ni) begin + start_of_tf_d = 1'b1; + end else if (cnt_at_target) begin + start_of_tf_d = 1'b0; + end + end + `FF(start_of_tf_q, start_of_tf_d, '0, tx_clk_90_i); + // TODO: Check proper sampling point in sim - assign enable_sampling = (cnt_edge_q == cnt_target_value); + assign enable_sampling = (cnt_at_target && start_of_tf_q); tc_clk_mux2 i_sampling_clk_mux ( .clk0_i ( hyper_ck_ni ), @@ -85,7 +102,7 @@ module hyperbus_rwds_sampler import hyperbus_pkg::*; #() .clk_o ( sampling_clk ) ); - // gate the sampling of rwds to the correct rising clock edge + // gate the sampling of rwds to the selected clock edge tc_clk_gating i_rwds_sample_rise_gate ( .clk_i ( sampling_clk ), .en_i ( enable_sampling ), @@ -93,7 +110,7 @@ module hyperbus_rwds_sampler import hyperbus_pkg::*; #() .clk_o ( sampling_clk_gated ) ); - // sample rwds exactly once at the correct edge + // sample rwds exactly once using gated clock `FF(rwds_sample, hyper_rwds_i, '0, sampling_clk_gated); // pass rwds to phy-clock domain diff --git a/src/hyperbus_trx.sv b/src/hyperbus_trx.sv index 0a15286..f586774 100644 --- a/src/hyperbus_trx.sv +++ b/src/hyperbus_trx.sv @@ -148,7 +148,8 @@ module hyperbus_trx #( .cfg_edge_idx_i, .cfg_edge_pol_i, .rwds_sample_o, - .hyper_cs_ni ( hyper_cs_no ), + .tx_clk_90_i ( tx_clk_90 ), + .hyper_cs_ni ( &hyper_cs_no ), .hyper_ck_i ( hyper_ck_o ), .hyper_ck_ni ( hyper_ck_no ), .hyper_rwds_i ( hyper_rwds_i ) From 1983836b2405aee2a6f0128ef1b400b765758cbe Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Tue, 28 Oct 2025 17:09:06 +0100 Subject: [PATCH 14/16] Sample RWDS relative to CS edge not clock start --- src/hyperbus_cfg_regs.sv | 4 +- src/hyperbus_phy.sv | 38 ++++++++---------- src/hyperbus_pkg.sv | 14 +++---- src/hyperbus_rwds_sampler.sv | 78 +++++++++++++++--------------------- src/hyperbus_trx.sv | 15 ++----- 5 files changed, 62 insertions(+), 87 deletions(-) diff --git a/src/hyperbus_cfg_regs.sv b/src/hyperbus_cfg_regs.sv index 680aa67..bafb747 100644 --- a/src/hyperbus_cfg_regs.sv +++ b/src/hyperbus_cfg_regs.sv @@ -59,7 +59,7 @@ module hyperbus_cfg_regs #( if (sel_reg_mapped) begin rfield = { crange_q, - reg_data_t'(cfg_q.rwds_sample_edge), + reg_data_t'(cfg_q.rwds_sample), reg_data_t'(cfg_q.csn_to_ck_cycles), reg_data_t'(cfg_q.t_csh_cycles), reg_data_t'(cfg_q.which_phy), @@ -102,7 +102,7 @@ module hyperbus_cfg_regs #( 'h9: cfg_d.which_phy = (NumPhys==1) ? 0 : ( (~wmask & cfg_q.which_phy ) | (wmask & reg_req_i.wdata) ); 'ha: cfg_d.t_csh_cycles = (~wmask & cfg_q.t_csh_cycles ) | (wmask & reg_req_i.wdata); 'hb: cfg_d.csn_to_ck_cycles = (~wmask & cfg_q.csn_to_ck_cycles ) | (wmask & reg_req_i.wdata); - 'hc: cfg_d.rwds_sample_edge = (~wmask & cfg_q.rwds_sample_edge ) | (wmask & reg_req_i.wdata); + 'hc: cfg_d.rwds_sample = (~wmask & cfg_q.rwds_sample ) | (wmask & reg_req_i.wdata); default: begin {sel_chip, chip_reg} = sel_reg - NumBaseRegs; crange_d[sel_chip][chip_reg] = (~wmask & crange_q[sel_chip][chip_reg]) | (wmask & reg_req_i.wdata); diff --git a/src/hyperbus_phy.sv b/src/hyperbus_phy.sv index 47e6420..43e4e57 100644 --- a/src/hyperbus_phy.sv +++ b/src/hyperbus_phy.sv @@ -98,7 +98,6 @@ module hyperbus_phy import hyperbus_pkg::*; #( logic trx_clk_ena; logic trx_cs_ena; logic trx_rwds_sample; - logic trx_rwds_sample_ena; logic [15:0] trx_tx_data; logic trx_tx_data_oe; logic [1:0] trx_tx_rwds; @@ -123,24 +122,23 @@ module hyperbus_phy import hyperbus_pkg::*; #( .clk_i_90, .rst_ni, .test_mode_i, - .cfg_edge_idx_i ( cfg_i.rwds_sample_edge.cylce_idx ), - .cfg_edge_pol_i ( cfg_i.rwds_sample_edge.polarity ), - .cs_i ( cs_q ), - .cs_ena_i ( trx_cs_ena ), - .rwds_sample_o ( trx_rwds_sample ), - .rwds_sample_ena_i ( trx_rwds_sample_ena ), - .tx_clk_delay_i ( cfg_i.t_tx_clk_delay ), - .tx_clk_ena_i ( trx_clk_ena ), - .tx_data_i ( trx_tx_data ), - .tx_data_oe_i ( trx_tx_data_oe ), - .tx_rwds_i ( trx_tx_rwds ), - .tx_rwds_oe_i ( trx_tx_rwds_oe ), - .rx_clk_delay_i ( cfg_i.t_rx_clk_delay ), - .rx_clk_set_i ( trx_rx_clk_set ), - .rx_clk_reset_i ( trx_rx_clk_reset ), - .rx_data_o ( trx_rx_data ), - .rx_valid_o ( trx_rx_valid ), - .rx_ready_i ( trx_rx_ready ), + .cfg_edge_idx_i ( cfg_i.rwds_sample.cylce_idx ), + .cfg_edge_pol_i ( cfg_i.rwds_sample.polarity ), + .cs_i ( cs_q ), + .cs_ena_i ( trx_cs_ena ), + .rwds_sample_o ( trx_rwds_sample ), + .tx_clk_delay_i ( cfg_i.t_tx_clk_delay ), + .tx_clk_ena_i ( trx_clk_ena ), + .tx_data_i ( trx_tx_data ), + .tx_data_oe_i ( trx_tx_data_oe ), + .tx_rwds_i ( trx_tx_rwds ), + .tx_rwds_oe_i ( trx_tx_rwds_oe ), + .rx_clk_delay_i ( cfg_i.t_rx_clk_delay ), + .rx_clk_set_i ( trx_rx_clk_set ), + .rx_clk_reset_i ( trx_rx_clk_reset ), + .rx_data_o ( trx_rx_data ), + .rx_valid_o ( trx_rx_valid ), + .rx_ready_i ( trx_rx_ready ), .hyper_cs_no, .hyper_ck_o, .hyper_ck_no, @@ -245,7 +243,6 @@ module hyperbus_phy import hyperbus_pkg::*; #( trx_cs_ena = 1'b1; trx_clk_ena = 1'b0; trx_rx_clk_set = 1'b0; - trx_rwds_sample_ena = 1'b0; // Default next state state_d = state_q; timer_d = timer_q - 1; @@ -301,7 +298,6 @@ module hyperbus_phy import hyperbus_pkg::*; #( // Dataflow handled outside FSM trx_clk_ena = 1'b1; trx_tx_data_oe = 1'b1; - trx_rwds_sample_ena = ~ctl_write_zero_lat; if (ctl_timer_zero) begin if (ctl_write_zero_lat) begin timer_d = cfg_i.t_burst_max; diff --git a/src/hyperbus_pkg.sv b/src/hyperbus_pkg.sv index cf9126b..e1f90e6 100644 --- a/src/hyperbus_pkg.sv +++ b/src/hyperbus_pkg.sv @@ -10,8 +10,8 @@ package hyperbus_pkg; typedef struct packed { - logic [1:0] cylce_idx; - logic polarity; + logic [3:0] cylce_idx; // number of cycles passed when sampling should occur + logic polarity; // 1: rising, 0: falling } hyper_cfg_rwds_t; // configuration type @@ -28,7 +28,7 @@ package hyperbus_pkg; logic which_phy; logic [3:0] t_csh_cycles; // add an configurable Tcsh for high freq operation(200MHz Hyperram) logic [3:0] csn_to_ck_cycles; // delay hyper_ck after CS is asserted (more time for t_DSV) - hyper_cfg_rwds_t rwds_sample_edge; + hyper_cfg_rwds_t rwds_sample; } hyper_cfg_t; typedef struct packed { @@ -85,10 +85,10 @@ package hyperbus_pkg; phys_in_use: NumPhys-1, which_phy: NumPhys-1, t_csh_cycles: 'h1, - csn_to_ck_cycles: 'h2, - rwds_sample_edge: hyper_cfg_rwds_t'{ // third rising edge, see hyperbus_rwds_sampler - cylce_idx: 'h1, - polarity: 'b1 } + csn_to_ck_cycles: 'h0, // additional cycles from CS_N going low to start of hyper_ck + rwds_sample: hyper_cfg_rwds_t'{ // hyper_ck edge for RWDS sampling relative to CS_N going low + cylce_idx: 'h2, // cycle number after CS_N going low (first falling and rising edge is idx=0) + polarity: 'b1 } // 0: falling, 1:rising -> first edge after CS_N is a falling edge }; return cfg; diff --git a/src/hyperbus_rwds_sampler.sv b/src/hyperbus_rwds_sampler.sv index 01dac40..3c6927b 100644 --- a/src/hyperbus_rwds_sampler.sv +++ b/src/hyperbus_rwds_sampler.sv @@ -31,76 +31,62 @@ module hyperbus_rwds_sampler import hyperbus_pkg::*; #() input logic rst_ni, input logic test_mode_i, - input logic [1:0] cfg_edge_idx_i, // #edge where rwds is sampled + input logic [3:0] cfg_edge_idx_i, // #edge where rwds is sampled input logic cfg_edge_pol_i, // 1: rising, 0: falling // sampled value going to PHY-FSM output logic rwds_sample_o, - // ungated hyperbus clock - input logic tx_clk_90_i, - // physical HyperBus signals input logic hyper_cs_ni, - input logic hyper_ck_i, - input logic hyper_ck_ni, input logic hyper_rwds_i ); // used to time the sampling of RWDS to determine additional latency - logic [2:0] cnt_edge_d, cnt_edge_q; // one bit larger than config - logic start_of_tf_d, start_of_tf_q; // start of transfer indicator - logic [2:0] cnt_target_value; - logic cnt_at_target; + logic tx_clk_180; // inverted clock + logic [4:0] cnt_edge_d, cnt_edge_q; // one bit larger than config logic cnt_clk; // clock used for edge counting logic sampling_clk, sampling_clk_gated; // clock used for sampling logic enable_sampling; // sampling clock gate enable logic rwds_sample; - assign cnt_target_value = cfg_edge_idx_i + 1; - assign cnt_at_target = (cnt_target_value == cnt_edge_q); - + // generate and select clocks + // Sampling is either clocked by un-inverted or inverted 90deg hyperbus clock + // Counter is clocked by the inverse as it controls the clock gate + // which should be on for one cycle with sampling edge in the middle + tc_clk_inverter i_tx_clk_inv ( + .clk_i ( clk_i ), + .clk_o ( tx_clk_180 ) + ); + + tc_clk_mux2 i_sampling_clk_mux ( + .clk0_i ( tx_clk_180 ), + .clk1_i ( clk_i ), + .clk_sel_i ( cfg_edge_pol_i ), + .clk_o ( sampling_clk ) + ); + + tc_clk_inverter i_edge_cnt_clk_inv ( + .clk_i ( sampling_clk ), + .clk_o ( cnt_clk ) + ); + always_comb begin : gen_edge_cnt - // only count at the start of a transfer - if(start_of_tf_q) begin - cnt_edge_d = cnt_edge_q +1; // count hyper_ck(_n) edges + // only count during transfers + if(~hyper_cs_ni) begin + cnt_edge_d = cnt_edge_q +1; + if(cnt_edge_q == '1) begin + cnt_edge_d = cnt_edge_q; // saturating counter + end end else begin - // reset counter for next start of transfer + // reset counter for next transfer cnt_edge_d = 1'b0; end end - // sampling on the rising edge requires counting on falling edges to create - // a window where the clk-gate is transparent around rising edge and vice versa - tc_clk_mux2 i_cnt_clk_mux ( - .clk0_i ( hyper_ck_ni ), - .clk1_i ( hyper_ck_i ), - .clk_sel_i ( ~cfg_edge_pol_i ), - .clk_o ( cnt_clk ) - ); `FF(cnt_edge_q, cnt_edge_d, '0, cnt_clk); - // used to reset counter and ensure clock gate opens only once - // clocked with ungated clock to detect cs_n going high - always_comb begin : gen_start_of_transfer - start_of_tf_d = start_of_tf_q; - if(hyper_cs_ni) begin - start_of_tf_d = 1'b1; - end else if (cnt_at_target) begin - start_of_tf_d = 1'b0; - end - end - `FF(start_of_tf_q, start_of_tf_d, '0, tx_clk_90_i); - - // TODO: Check proper sampling point in sim - assign enable_sampling = (cnt_at_target && start_of_tf_q); - - tc_clk_mux2 i_sampling_clk_mux ( - .clk0_i ( hyper_ck_ni ), - .clk1_i ( hyper_ck_i ), - .clk_sel_i ( cfg_edge_pol_i ), - .clk_o ( sampling_clk ) - ); + assign enable_sampling = (cnt_edge_q == cfg_edge_idx_i) & ~hyper_cs_ni; // gate the sampling of rwds to the selected clock edge tc_clk_gating i_rwds_sample_rise_gate ( diff --git a/src/hyperbus_trx.sv b/src/hyperbus_trx.sv index f586774..098935b 100644 --- a/src/hyperbus_trx.sv +++ b/src/hyperbus_trx.sv @@ -18,14 +18,13 @@ module hyperbus_trx #( input logic rst_ni, input logic test_mode_i, - input logic [1:0] cfg_edge_idx_i, + input logic [3:0] cfg_edge_idx_i, input logic cfg_edge_pol_i, // Transceiver control: facing controller input logic [NumChips-1:0] cs_i, input logic cs_ena_i, output logic rwds_sample_o, - input logic rwds_sample_ena_i, input logic [3:0] tx_clk_delay_i, input logic tx_clk_ena_i, @@ -69,11 +68,6 @@ module hyperbus_trx #( logic rx_rwds_fifo_valid; logic rx_rwds_fifo_ready; - // used to time the sampling of RWDS to determine additional latency - logic [2:0] ck_cnt_d, ck_cnt_q; // TODO: check in sim if this can be one less - logic rwds_sample_ena; - logic rwds_sample_clk; - // Feed through async reset assign hyper_reset_no = rst_ni; @@ -85,6 +79,8 @@ module hyperbus_trx #( assign tx_clk_90 = clk_i_90; // 90deg-shifted differential output clock, sampling output bytes centrally + // TODO: tx_clk_ena_q to tx_clk_90 may need a constraint at the pins of this module + // specifically tx_clk_ena_q must arrive BEFORE tx_clk_90 otherwise the gating may fail hyperbus_clock_diff_out i_clock_diff_out ( .in_i ( tx_clk_90 ), .en_i ( tx_clk_ena_q ), @@ -93,7 +89,7 @@ module hyperbus_trx #( ); // Synchronize output chip select to shifted differential output clock - always_ff @(posedge tx_clk_90 or negedge rst_ni) begin : proc_ff_tx_shift90 + always_ff @(negedge clk_i or negedge rst_ni) begin : proc_ff_tx_shift90 if (~rst_ni) hyper_cs_no <= '1; else hyper_cs_no <= cs_ena_i ? ~cs_i : '1; end @@ -148,10 +144,7 @@ module hyperbus_trx #( .cfg_edge_idx_i, .cfg_edge_pol_i, .rwds_sample_o, - .tx_clk_90_i ( tx_clk_90 ), .hyper_cs_ni ( &hyper_cs_no ), - .hyper_ck_i ( hyper_ck_o ), - .hyper_ck_ni ( hyper_ck_no ), .hyper_rwds_i ( hyper_rwds_i ) ); From 141fca8ff8a315003a8dcb206e1a7356aea012aa Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Tue, 28 Oct 2025 17:09:45 +0100 Subject: [PATCH 15/16] Implement Xilinx delay lines Adds the Xilinx/Gensys2 delay lines for 200MHz operation. --- Bender.yml | 4 + models/configurable_delay.behav.sv | 11 ++- models/configurable_delay.fpga.sv | 2 - src/hyperbus.sv | 116 +++++++++++++-------------- src/hyperbus_clk_gen.sv | 3 + src/hyperbus_clock_diff_out.sv | 3 + src/hyperbus_delay.sv | 13 +-- src/hyperbus_phy.sv | 2 - src/hyperbus_phy_if.sv | 66 ++++++++++----- src/hyperbus_pkg.sv | 6 +- src/hyperbus_rwds_sampler.sv | 3 + src/hyperbus_trx.sv | 27 +++++-- target/xilinx/hyperbus_clk_delay.sv | 83 +++++++++++++++++++ target/xilinx/hyperbus_rwds_delay.sv | 50 ++++++++++++ test/dut_if.sv | 38 ++++----- 15 files changed, 304 insertions(+), 123 deletions(-) create mode 100644 target/xilinx/hyperbus_clk_delay.sv create mode 100644 target/xilinx/hyperbus_rwds_delay.sv diff --git a/Bender.yml b/Bender.yml index 1ea23f4..e8af3d2 100644 --- a/Bender.yml +++ b/Bender.yml @@ -40,6 +40,10 @@ sources: - src/hyperbus_phy.sv - src/hyperbus_phy_if.sv - src/hyperbus_axi.sv + - target: xilinx + files: + - target/xilinx/hyperbus_clk_delay.sv + - target/xilinx/hyperbus_rwds_delay.sv - target: hyper_test files: # Device models. TODO: extend diff --git a/models/configurable_delay.behav.sv b/models/configurable_delay.behav.sv index f000ede..bc445ab 100644 --- a/models/configurable_delay.behav.sv +++ b/models/configurable_delay.behav.sv @@ -13,8 +13,6 @@ // Automatically generated by the Generic Delay generator. `timescale 1ps/1ps -(* no_ungroup *) -(* no_boundary_optimization *) module configurable_delay #( parameter int unsigned NUM_STEPS, // The desired number of delay taps. Must be // a power of 2. Don't use very large values @@ -23,17 +21,18 @@ module configurable_delay #( // will not work. localparam DELAY_SEL_WIDTH = $clog2(NUM_STEPS) ) ( - input logic clk_i, - input logic enable_i, + input logic clk_i, input logic [DELAY_SEL_WIDTH-1:0] delay_i, output logic clk_o ); - logic enable_latched; logic clk; assign clk = clk_i; - always @(clk) clk_o <= #(real'(delay_i)*3.750ns/15) clk; + // The standard delay line is expected to have 32 taps with ~78ps per tap + // This conforms to the Xilinx IDELAYE2 with a 200MHz reference clock + // The total delay range is thus ~2.5ns + always @(clk) clk_o <= #(real'(delay_i)*78ps + 10ps) clk; endmodule diff --git a/models/configurable_delay.fpga.sv b/models/configurable_delay.fpga.sv index 11be16c..2b6bcfd 100644 --- a/models/configurable_delay.fpga.sv +++ b/models/configurable_delay.fpga.sv @@ -11,8 +11,6 @@ `timescale 1ps/1ps -(* no_ungroup *) -(* no_boundary_optimization *) module configurable_delay #( parameter int unsigned NUM_STEPS, // The desired number of delay taps. Must be // a power of 2. Don't use very large values diff --git a/src/hyperbus.sv b/src/hyperbus.sv index 7b4a005..da287fa 100644 --- a/src/hyperbus.sv +++ b/src/hyperbus.sv @@ -7,29 +7,29 @@ // Luca Valente module hyperbus #( - parameter int unsigned NumChips = -1, - parameter int unsigned NumPhys = 2, - parameter int unsigned IsClockODelayed = 0, - parameter int unsigned AxiAddrWidth = -1, - parameter int unsigned AxiDataWidth = -1, - parameter int unsigned AxiIdWidth = -1, - parameter int unsigned AxiUserWidth = -1, - parameter type axi_req_t = logic, - parameter type axi_rsp_t = logic, - parameter type axi_w_chan_t = logic, - parameter type axi_b_chan_t = logic, - parameter type axi_ar_chan_t = logic, - parameter type axi_r_chan_t = logic, - parameter type axi_aw_chan_t = logic, - parameter int unsigned RegAddrWidth = -1, - parameter int unsigned RegDataWidth = -1, + parameter int unsigned NumChips = -1, + parameter int unsigned NumPhys = 2, + parameter bit UsePhyClkDivider = 1, + parameter int unsigned AxiAddrWidth = -1, + parameter int unsigned AxiDataWidth = -1, + parameter int unsigned AxiIdWidth = -1, + parameter int unsigned AxiUserWidth = -1, + parameter type axi_req_t = logic, + parameter type axi_rsp_t = logic, + parameter type axi_w_chan_t = logic, + parameter type axi_b_chan_t = logic, + parameter type axi_ar_chan_t = logic, + parameter type axi_r_chan_t = logic, + parameter type axi_aw_chan_t = logic, + parameter int unsigned RegAddrWidth = -1, + parameter int unsigned RegDataWidth = -1, parameter int unsigned MinFreqMHz = 100, - parameter type reg_req_t = logic, - parameter type reg_rsp_t = logic, - parameter type axi_rule_t = logic, + parameter type reg_req_t = logic, + parameter type reg_rsp_t = logic, + parameter type axi_rule_t = logic, // The below have sensible defaults, but should be set on integration! - parameter int unsigned RxFifoLogDepth = 3, - parameter int unsigned TxFifoLogDepth = 3, + parameter int unsigned RxFifoLogDepth = 3, + parameter int unsigned TxFifoLogDepth = 3, parameter logic [RegDataWidth-1:0] RstChipBase = 'h0, // Base address for all chips parameter logic [RegDataWidth-1:0] RstChipSpace = 'h1_0000, // 64 KiB: Current maximum HyperBus device size parameter hyperbus_pkg::hyper_cfg_t RstCfg = hyperbus_pkg::gen_RstCfg(NumPhys,MinFreqMHz), @@ -37,6 +37,9 @@ module hyperbus #( parameter int unsigned SyncStages = 2 ) ( input logic clk_phy_i, +`ifdef TARGET_XILINX + input logic clk_ref200_i, // only used for Xilinx delay lines +`endif input logic rst_phy_ni, input logic clk_sys_i, input logic rst_sys_ni, @@ -79,7 +82,7 @@ module hyperbus #( } tf_cdc_t; - logic clk_phy_i_0, clk_phy_i_90, rst_phy; + logic clk_phy_0, clk_phy_90, rst_phy; // Register file hyperbus_pkg::hyper_cfg_t cfg; @@ -178,18 +181,37 @@ module hyperbus #( .trans_active_o ( trans_active ) ); + if(UsePhyClkDivider == 1'b1) begin : clock_generator + hyperbus_clk_gen ddr_clk ( + .clk_i ( clk_phy_i ), + .rst_ni ( rst_phy_ni ), + .clk0_o ( clk_phy_0 ), + .clk90_o ( clk_phy_90 ), + .clk180_o ( ), + .clk270_o ( ), + .rst_no ( rst_phy ) + ); + end else begin + assign clk_phy_0 = clk_phy_i; + assign clk_phy_90 = '0; + assign rst_phy = rst_phy_ni; + end + hyperbus_phy_if #( - .IsClockODelayed( IsClockODelayed ), - .NumChips ( NumChips ), - .StartupCycles ( PhyStartupCycles ), - .NumPhys ( NumPhys ), - .hyper_rx_t ( hyper_rx_t ), - .hyper_tx_t ( hyper_tx_t ), - .SyncStages ( SyncStages ) + .UsePhyClkDivider ( UsePhyClkDivider ), + .NumChips ( NumChips ), + .NumPhys ( NumPhys ), + .StartupCycles ( PhyStartupCycles ), + .hyper_rx_t ( hyper_rx_t ), + .hyper_tx_t ( hyper_tx_t ), + .SyncStages ( SyncStages ) ) i_phy ( - .clk_i ( clk_phy_i_0 ), - .clk_i_90 ( clk_phy_i_90 ), - .rst_ni ( rst_phy ), + .clk_phy_i ( clk_phy_0 ), + .clk_phy_i_90 ( clk_phy_90 ), +`ifdef TARGET_XILINX + .clk_ref200_i ( clk_ref200_i ), +`endif + .rst_phy_ni ( rst_phy ), .test_mode_i ( test_mode_i ), .cfg_i ( cfg ), @@ -230,7 +252,7 @@ module hyperbus #( .src_ready_o ( axi_trans_ready ), .dst_rst_ni ( rst_phy ), - .dst_clk_i ( clk_phy_i_0 ), + .dst_clk_i ( clk_phy_0 ), .dst_data_o ( phy_tf_cdc ), .dst_valid_o ( phy_trans_valid ), .dst_ready_i ( phy_trans_ready ) @@ -240,7 +262,7 @@ module hyperbus #( .T ( logic ) ) i_cdc_2phase_b ( .src_rst_ni ( rst_phy ), - .src_clk_i ( clk_phy_i_0 ), + .src_clk_i ( clk_phy_0 ), .src_data_i ( phy_b_error ), .src_valid_i ( phy_b_valid ), .src_ready_o ( phy_b_ready ), @@ -264,7 +286,7 @@ module hyperbus #( .src_ready_o ( axi_tx_ready ), .dst_rst_ni ( rst_phy ), - .dst_clk_i ( clk_phy_i_0 ), + .dst_clk_i ( clk_phy_0 ), .dst_data_o ( phy_tx ), .dst_valid_o ( phy_tx_valid ), .dst_ready_i ( phy_tx_ready ) @@ -276,7 +298,7 @@ module hyperbus #( .LOG_DEPTH ( RxFifoLogDepth ) ) i_cdc_fifo_rx ( .src_rst_ni ( rst_phy ), - .src_clk_i ( clk_phy_i_0 ), + .src_clk_i ( clk_phy_0 ), .src_data_i ( phy_rx ), .src_valid_i ( phy_rx_valid ), .src_ready_o ( phy_rx_ready ), @@ -288,27 +310,5 @@ module hyperbus #( .dst_ready_i ( axi_rx_ready ) ); - // Shift clock by 90 degrees - generate - if(IsClockODelayed==0) begin : clock_generator - hyperbus_clk_gen ddr_clk ( - .clk_i ( clk_phy_i ), - .rst_ni ( rst_phy_ni ), - .clk0_o ( clk_phy_i_0 ), - .clk90_o ( clk_phy_i_90 ), - .clk180_o ( ), - .clk270_o ( ), - .rst_no ( rst_phy ) - ); - end else if (IsClockODelayed==1) begin - assign clk_phy_i_0 = clk_phy_i; - assign rst_phy = rst_phy_ni; - hyperbus_delay i_delay_tx_clk_90 ( - .in_i ( clk_phy_i_0 ), - .delay_i ( cfg.t_tx_clk_delay ), - .out_o ( clk_phy_i_90 ) - ); - end - endgenerate endmodule : hyperbus diff --git a/src/hyperbus_clk_gen.sv b/src/hyperbus_clk_gen.sv index 546ca86..26400c8 100644 --- a/src/hyperbus_clk_gen.sv +++ b/src/hyperbus_clk_gen.sv @@ -5,6 +5,9 @@ // Hayate Okuhara /// Generates 4 phase shifted clocks out of one faster clock +(* no_ungroup *) +(* no_boundary_optimization *) +(* keep_hierarchy = "yes" *) module hyperbus_clk_gen ( input logic clk_i, // input clock input logic rst_ni, diff --git a/src/hyperbus_clock_diff_out.sv b/src/hyperbus_clock_diff_out.sv index a73ddde..67c6ae6 100644 --- a/src/hyperbus_clock_diff_out.sv +++ b/src/hyperbus_clock_diff_out.sv @@ -6,6 +6,9 @@ // Stephan Keck /// A Hyperbus differential clock output generator. +(* no_ungroup *) +(* no_boundary_optimization *) +(* keep_hierarchy = "yes" *) module hyperbus_clock_diff_out ( input logic in_i, diff --git a/src/hyperbus_delay.sv b/src/hyperbus_delay.sv index e7b8853..a441a3d 100644 --- a/src/hyperbus_delay.sv +++ b/src/hyperbus_delay.sv @@ -5,19 +5,22 @@ // Thomas Benz // Paul Scheffler +(* no_ungroup *) +(* no_boundary_optimization *) +(* keep_hierarchy = "yes" *) module hyperbus_delay ( input logic in_i, - input logic [3:0] delay_i, + input logic [4:0] delay_i, output logic out_o ); + // The standard delay line is expected to have 32 taps with ~78ps per tap + // This conforms to the Xilinx IDELAYE2 with a 200MHz reference clock + // The total delay range is thus ~2.5ns configurable_delay #( - .NUM_STEPS(16) + .NUM_STEPS(32) ) i_delay ( .clk_i ( in_i ), - `ifndef TARGET_ASIC - .enable_i ( 1'b1 ), - `endif .delay_i ( delay_i ), .clk_o ( out_o ) ); diff --git a/src/hyperbus_phy.sv b/src/hyperbus_phy.sv index 43e4e57..1099f00 100644 --- a/src/hyperbus_phy.sv +++ b/src/hyperbus_phy.sv @@ -8,7 +8,6 @@ // Paul Scheffler module hyperbus_phy import hyperbus_pkg::*; #( - parameter int unsigned IsClockODelayed = -1, parameter int unsigned NumChips = 2, parameter int unsigned NumPhys = -1, parameter int unsigned TimerWidth = 16, @@ -113,7 +112,6 @@ module hyperbus_phy import hyperbus_pkg::*; #( // ================= hyperbus_trx #( - .IsClockODelayed( IsClockODelayed ), .NumChips ( NumChips ), .RxFifoLogDepth ( RxFifoLogDepth ), .SyncStages ( SyncStages ) diff --git a/src/hyperbus_phy_if.sv b/src/hyperbus_phy_if.sv index 55b395f..c31676b 100644 --- a/src/hyperbus_phy_if.sv +++ b/src/hyperbus_phy_if.sv @@ -3,9 +3,10 @@ // SPDX-License-Identifier: SHL-0.51 // // Luca Valente +// Philippe Sauter module hyperbus_phy_if import hyperbus_pkg::*; #( - parameter int unsigned IsClockODelayed = 1, + parameter bit UsePhyClkDivider = 1, parameter int unsigned NumChips = 2, parameter int unsigned NumPhys = 2, parameter int unsigned StartupCycles = 60000, /*MHz*/ // Conservative maximum frequency estimate @@ -13,9 +14,12 @@ module hyperbus_phy_if import hyperbus_pkg::*; #( parameter type hyper_tx_t = logic, parameter type hyper_rx_t = logic )( - input logic clk_i, - input logic clk_i_90, - input logic rst_ni, + input logic clk_phy_i, + input logic clk_phy_i_90, // only used together with divided clock (clk_gen) +`ifdef TARGET_XILINX + input logic clk_ref200_i, // only used with Xilinx delay lines (reference to IDELAY cells) +`endif + input logic rst_phy_ni, input logic test_mode_i, // Config registers input hyper_cfg_t cfg_i, @@ -37,7 +41,7 @@ module hyperbus_phy_if import hyperbus_pkg::*; #( input logic b_ready_i, output logic b_error_o, - // Physical interace: facing HyperBus + // Physical interface: facing HyperBus output logic [NumPhys-1:0][NumChips-1:0] hyper_cs_no, output logic [NumPhys-1:0] hyper_ck_o, output logic [NumPhys-1:0] hyper_ck_no, @@ -50,6 +54,34 @@ module hyperbus_phy_if import hyperbus_pkg::*; #( output logic [NumPhys-1:0] hyper_reset_no ); + logic clk_phy_0, clk_phy_90; + + // Shift clock by 90 degrees + if(UsePhyClkDivider == '0) begin : clock_generator + assign clk_phy_0 = clk_phy_i; + + `ifdef TARGET_XILINX + hyperbus_clk_delay i_delay_tx_clk_90 ( + .rst_i ( ~rst_ni ), + .clk_ref200_i, + .clk_i ( clk_phy_i ), + .in_i ( clk_phy_0 ), + .delay_i ( cfg_i.t_tx_clk_delay ), + .out_o ( clk_phy_90 ) + ); + `else + hyperbus_delay i_delay_tx_clk_90 ( + .in_i ( clk_phy_0 ), + .delay_i ( cfg_i.t_tx_clk_delay ), + .out_o ( clk_phy_90 ) + ); + `endif + end else begin + assign clk_phy_0 = clk_phy_i; + assign clk_phy_90 = clk_phy_i_90; + end + + phy_rx_t [NumPhys-1:0] phy_fifo_rx; phy_rx_t [NumPhys-1:0] fifo_axi_rx; logic [NumPhys-1:0] phy_fifo_valid; @@ -72,9 +104,6 @@ module hyperbus_phy_if import hyperbus_pkg::*; #( logic [NumPhys-1:0] phy_b_error; logic phy_b_ready; - genvar i; - generate - if (NumPhys==2) begin : phy_wrap logic [NumPhys-1:0] phy_enable; @@ -116,7 +145,7 @@ module hyperbus_phy_if import hyperbus_pkg::*; #( assign phy_trans_valid = change_phy_active ? '0 : phy_trans_ready & {NumPhys{trans_valid_i}} & phy_active_q; - for ( i=0; i module hyperbus_trx #( - parameter int unsigned IsClockODelayed = -1, parameter int unsigned NumChips = 2, parameter int unsigned RxFifoLogDepth = 3, parameter int unsigned SyncStages = 2 @@ -26,14 +25,14 @@ module hyperbus_trx #( input logic cs_ena_i, output logic rwds_sample_o, - input logic [3:0] tx_clk_delay_i, + input logic [4:0] tx_clk_delay_i, input logic tx_clk_ena_i, input logic [15:0] tx_data_i, input logic tx_data_oe_i, input logic [1:0] tx_rwds_i, input logic tx_rwds_oe_i, - input logic [3:0] rx_clk_delay_i, + input logic [4:0] rx_clk_delay_i, input logic rx_clk_set_i, input logic rx_clk_reset_i, output logic [15:0] rx_data_o, @@ -156,13 +155,25 @@ module hyperbus_trx #( end // Shift RWDS clock by 90 degrees - hyperbus_delay i_delay_rx_rwds_90 ( - .in_i ( hyper_rwds_i ), - .delay_i ( rx_clk_delay_i ), - .out_o ( rx_rwds_90 ) - ); +`ifdef TARGET_XILINX + hyperbus_rwds_delay i_delay_rx_rwds_90 ( + .rst_i ( ~rst_ni ), + .clk_i, + .in_i ( hyper_rwds_i ), + .delay_i ( rx_clk_delay_i ), + .out_o ( rx_rwds_90 ) + ); + `else + hyperbus_delay i_delay_rx_rwds_90 ( + .in_i ( hyper_rwds_i ), + .delay_i ( rx_clk_delay_i ), + .out_o ( rx_rwds_90 ) + ); +`endif // Gate delayed RWDS clock with RX clock enable + (* no_boundary_optimization *) + (* keep_hierarchy = "yes" *) tc_clk_gating i_rwds_in_clk_gate ( .clk_i ( rx_rwds_90 ), .en_i ( rx_rwds_clk_ena ), diff --git a/target/xilinx/hyperbus_clk_delay.sv b/target/xilinx/hyperbus_clk_delay.sv new file mode 100644 index 0000000..c67957d --- /dev/null +++ b/target/xilinx/hyperbus_clk_delay.sv @@ -0,0 +1,83 @@ +// Copyright 2025 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Philippe Sauter + +(* no_ungroup *) +(* no_boundary_optimization *) +(* keep_hierarchy = "yes" *) +module hyperbus_clk_delay +( + input logic rst_i, + input logic clk_ref200_i, // 200 MHz reference clock + input logic clk_i, // control clock used to load delay_i + input logic in_i, + input logic [4:0] delay_i, + output logic out_o +); + + // "Xilinx 7 Series FPGA and Zynq-7000 All Programmable SoC Libraries Guide for HDL Designs" - page 192 + // Calibrates delay lines (IDELAYE2 and ODELAYE2) from ref clock (200 MHz) + IDELAYCTRL i_delayctrl + ( + .REFCLK ( clk_ref200_i ), + .RST ( rst_i ), + .RDY () + ); + + // "Xilinx 7 Series FPGA and Zynq-7000 All Programmable SoC Libraries Guide for HDL Designs" - page 330 + // Delay a signal going out to IOs or delay an internal clock + // modes: + // - FIXED: constant delay value from param + // - VARIABLE: start with param value then increment/decrement + // - VAR_LOAD: dynamically load tap values + // - VAR_LOAD_PIPE: pipelines dynamic load + // ODELAYE2 #( + // .CINVCTRL_SEL ( "FALSE" ), // "TRUE" actives CINVCTRL functionality + // .DELAY_SRC ( "CLKIN" ), // source to delay chain ("CLKIN" or "ODATAIN") + // .HIGH_PERFORMANCE_MODE ( "TRUE" ), // "TRUE" for less jitter; "FALSE" for low power + // .ODELAY_TYPE ( "VAR_LOAD" ), // mode of operation, see above + // .ODELAY_VALUE ( 0 ), // delay value 0-31 (used in "VARIABLE" and "FIXED" mode) + // .PIPE_SEL ( "FALSE" ), // "TRUE" activates pipelined operation + // .REFCLK_FREQUENCY ( 200.0 ), // used for STA and simulation (190.0 - 310.0 MHz) + // .SIGNAL_PATTERN ( "CLOCK" ) // "DATA" or "CLOCK" depending on function, used in STA + // ) i_delay ( + // .REGRST ( rst_i ), // input: reset delay tap value to ODELAY_VALUE or CNTVALUEIN + // .C ( clk_i ), // input: control input clock + // .CLKIN ( in_i ), // input: clock to be delayed + // .ODATAIN ( 1'b0 ), // input: signal to be delayed driven by OSERDESE2 or output reg + // .DATAOUT ( out_o ), // output: delayed from ODATAIN (drives IO) or CLKIN (back into clock network) + // .CE ( 1'b0 ), // input: increment/decrement enable + // .CINVCTRL ( 1'b0 ), // input: switch clock polarity during operation (glitches!) + // .CNTVALUEIN ( delay_i ), // 5 bit input: delay tap + // .CNTVALUEOUT ( ), // 5 bit output: delay tap + // .INC ( 1'b0 ), // input: increment/decrement delay tap + // .LD ( 1'b0 ), // input: load ODELAY_VALUE param or CNTVALUEIN (depends on IDELAY_TYPE) + // .LDPIPEEN ( 1'b0 ) // input: enable the pipeline register to load data from LD + // ); + IDELAYE2 #( + .CINVCTRL_SEL ( "FALSE" ), // "TRUE" actives CINVCTRL functionality + .DELAY_SRC ( "DATAIN" ), // source to delay chain ("CLKIN" or "IDATAIN") + .HIGH_PERFORMANCE_MODE ( "TRUE" ), // "TRUE" for less jitter; "FALSE" for low power + .IDELAY_TYPE ( "VAR_LOAD" ), // mode of operation, see above + .IDELAY_VALUE ( 0 ), // delay value 0-31 (used in "VARIABLE" and "FIXED" mode) + .PIPE_SEL ( "FALSE" ), // "TRUE" activates pipelined operation + .REFCLK_FREQUENCY ( 200.0 ), // used for STA and simulation (190.0 - 310.0 MHz) + .SIGNAL_PATTERN ( "CLOCK" ) // "DATA" or "CLOCK" depending on function, used in STA + ) i_delay ( + .REGRST ( rst_i ), // input: reset delay tap value to IDELAY_VALUE or CNTVALUEIN + .C ( clk_i ), // input: control input clock + .DATAIN ( in_i ), // input: signal from FPGA logic to be delayed + .IDATAIN ( 1'b0 ), // input: signal from IO to be delayed + .DATAOUT ( out_o ), // output: delayed from DATAIN or IDATAIN (drives ISERDESE2 or logic, not IO!) + .CE ( 1'b0 ), // input: increment/decrement enable + .CINVCTRL ( 1'b0 ), // input: switch clock polarity during operation (glitches!) + .CNTVALUEIN ( delay_i ), // 5 bit input: delay tap + .CNTVALUEOUT ( ), // 5 bit output: delay tap + .LD ( 1'b1 ), // input: load IDELAY_VALUE param or CNTVALUEIN (depends on IDELAY_TYPE) + .INC ( 1'b0 ), // input: increment/decrement delay tap + .LDPIPEEN ( 1'b0 ) // input: enable the pipeline register to load data from LD + ); + +endmodule diff --git a/target/xilinx/hyperbus_rwds_delay.sv b/target/xilinx/hyperbus_rwds_delay.sv new file mode 100644 index 0000000..9bff254 --- /dev/null +++ b/target/xilinx/hyperbus_rwds_delay.sv @@ -0,0 +1,50 @@ +// Copyright 2025 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Philippe Sauter + +(* no_ungroup *) +(* no_boundary_optimization *) +(* keep_hierarchy = "yes" *) +module hyperbus_rwds_delay +( + input logic rst_i, + input logic in_i, + input logic clk_i, // control clock used to load delay_i + input logic [4:0] delay_i, + output logic out_o +); + + // "Xilinx 7 Series FPGA and Zynq-7000 All Programmable SoC Libraries Guide for HDL Designs" - page 194 + // Delay a signal coming in from IOs or delay an internal data signal + // modes: + // - FIXED: constant delay value from param + // - VARIABLE: start with param value then increment/decrement + // - VAR_LOAD: dynamically load tap values + // - VAR_LOAD_PIPE: pipelines dynamic load + IDELAYE2 #( + .CINVCTRL_SEL ( "FALSE" ), // "TRUE" actives CINVCTRL functionality + .DELAY_SRC ( "DATAIN" ), // source to delay chain ("CLKIN" or "IDATAIN") + .HIGH_PERFORMANCE_MODE ( "TRUE" ), // "TRUE" for less jitter; "FALSE" for low power + .IDELAY_TYPE ( "VAR_LOAD" ), // mode of operation, see above + .IDELAY_VALUE ( 0 ), // delay value 0-31 (used in "VARIABLE" and "FIXED" mode) + .PIPE_SEL ( "FALSE" ), // "TRUE" activates pipelined operation + .REFCLK_FREQUENCY ( 200.0 ), // used for STA and simulation (190.0 - 310.0 MHz) + .SIGNAL_PATTERN ( "CLOCK" ) // "DATA" or "CLOCK" depending on function, used in STA + ) i_delay ( + .REGRST ( rst_i ), // input: reset delay tap value to IDELAY_VALUE or CNTVALUEIN + .C ( clk_i ), // input: control input clock + .DATAIN ( in_i ), // input: signal from FPGA logic to be delayed + .IDATAIN ( 1'b0 ), // input: signal from IO to be delayed + .DATAOUT ( out_o ), // output: delayed from DATAIN or IDATAIN (drives ISERDESE2 or logic, not IO!) + .CE ( 1'b0 ), // input: increment/decrement enable + .CINVCTRL ( 1'b0 ), // input: switch clock polarity during operation (glitches!) + .CNTVALUEIN ( delay_i ), // 5 bit input: delay tap + .CNTVALUEOUT ( ), // 5 bit output: delay tap + .LD ( 1'b1 ), // input: load IDELAY_VALUE param or CNTVALUEIN (depends on IDELAY_TYPE) + .INC ( 1'b0 ), // input: increment/decrement delay tap + .LDPIPEEN ( 1'b0 ) // input: enable the pipeline register to load data from LD + ); + +endmodule diff --git a/test/dut_if.sv b/test/dut_if.sv index af257eb..7a06e5e 100644 --- a/test/dut_if.sv +++ b/test/dut_if.sv @@ -124,25 +124,25 @@ module dut_if // DUT hyperbus #( - .NumChips ( NumChips ), - .NumPhys ( NumPhys ), - .AxiAddrWidth ( AxiAddrWidth ), - .AxiDataWidth ( AxiDataWidth ), - .AxiIdWidth ( AxiIdWidth ), - .AxiUserWidth ( AxiUserWidth ), - .axi_req_t ( axi_req_t ), - .axi_rsp_t ( axi_resp_t ), - .axi_aw_chan_t ( axi_aw_chan_t ), - .axi_w_chan_t ( axi_w_chan_t ), - .axi_b_chan_t ( axi_b_chan_t ), - .axi_ar_chan_t ( axi_ar_chan_t ), - .axi_r_chan_t ( axi_r_chan_t ), - .RegAddrWidth ( RegAw ), - .RegDataWidth ( RegDw ), - .reg_req_t ( reg_req_t ), - .reg_rsp_t ( reg_rsp_t ), - .IsClockODelayed( 0 ), - .axi_rule_t ( axi_rule_t ) + .NumChips ( NumChips ), + .NumPhys ( NumPhys ), + .AxiAddrWidth ( AxiAddrWidth ), + .AxiDataWidth ( AxiDataWidth ), + .AxiIdWidth ( AxiIdWidth ), + .AxiUserWidth ( AxiUserWidth ), + .axi_req_t ( axi_req_t ), + .axi_rsp_t ( axi_resp_t ), + .axi_aw_chan_t ( axi_aw_chan_t ), + .axi_w_chan_t ( axi_w_chan_t ), + .axi_b_chan_t ( axi_b_chan_t ), + .axi_ar_chan_t ( axi_ar_chan_t ), + .axi_r_chan_t ( axi_r_chan_t ), + .RegAddrWidth ( RegAw ), + .RegDataWidth ( RegDw ), + .reg_req_t ( reg_req_t ), + .reg_rsp_t ( reg_rsp_t ), + .UsePhyClkDivider ( 1 ), + .axi_rule_t ( axi_rule_t ) ) i_dut ( .clk_phy_i ( clk_i ), .rst_phy_ni ( rst_ni ), From 4e957c493290b50dd413cbe3f0d5adb4a9097f58 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Wed, 29 Oct 2025 11:30:21 +0100 Subject: [PATCH 16/16] Fix phy_if clock and reset assignment --- src/hyperbus_phy_if.sv | 178 ++++++++++++++++++++--------------------- 1 file changed, 89 insertions(+), 89 deletions(-) diff --git a/src/hyperbus_phy_if.sv b/src/hyperbus_phy_if.sv index c31676b..e1d888e 100644 --- a/src/hyperbus_phy_if.sv +++ b/src/hyperbus_phy_if.sv @@ -89,7 +89,7 @@ module hyperbus_phy_if import hyperbus_pkg::*; #( logic [NumPhys-1:0] fifo_axi_valid; logic fifo_axi_ready; - logic [NumPhys-1:0][1:0] fifo_axi_usage; + logic [NumPhys-1:0][1:0] fifo_axi_usage; logic tx_both_ready, ts_both_ready; logic rx_both_valid, b_both_valid; @@ -97,90 +97,90 @@ module hyperbus_phy_if import hyperbus_pkg::*; #( logic [NumPhys-1:0] phy_tx_ready; logic phy_tx_valid; - logic [NumPhys-1:0] phy_trans_ready; - logic [NumPhys-1:0] phy_trans_valid; + logic [NumPhys-1:0] phy_trans_ready; + logic [NumPhys-1:0] phy_trans_valid; - logic [NumPhys-1:0] phy_b_valid; - logic [NumPhys-1:0] phy_b_error; - logic phy_b_ready; + logic [NumPhys-1:0] phy_b_valid; + logic [NumPhys-1:0] phy_b_error; + logic phy_b_ready; if (NumPhys==2) begin : phy_wrap - logic [NumPhys-1:0] phy_enable; - logic [NumPhys-1:0] phy_busy; - logic [NumPhys-1:0] phy_active_q, phy_active_d; - logic change_phy_active; - - assign change_phy_active = phy_active_q != phy_enable; - assign phy_enable = cfg_i.phys_in_use ? '1 : (1 << cfg_i.which_phy); - assign phy_active_d = change_phy_active && fifo_axi_usage == '0 ? - phy_enable | phy_busy : phy_active_q; - - always_ff @(posedge clk_i or negedge rst_ni ) begin - if (!rst_ni) begin - phy_active_q <= '1; - end else begin - phy_active_q <= phy_active_d; - end + logic [NumPhys-1:0] phy_enable; + logic [NumPhys-1:0] phy_busy; + logic [NumPhys-1:0] phy_active_q, phy_active_d; + logic change_phy_active; + + assign change_phy_active = phy_active_q != phy_enable; + assign phy_enable = cfg_i.phys_in_use ? '1 : (1 << cfg_i.which_phy); + assign phy_active_d = change_phy_active && fifo_axi_usage == '0 ? + phy_enable | phy_busy : phy_active_q; + + always_ff @(posedge clk_phy_0 or negedge rst_phy_ni ) begin + if (!rst_phy_ni) begin + phy_active_q <= '1; + end else begin + phy_active_q <= phy_active_d; end + end - assign rx_both_valid = & (fifo_axi_valid | ~phy_active_q); - assign rx_valid_o = rx_both_valid; - assign fifo_axi_ready = rx_ready_i && rx_both_valid; + assign rx_both_valid = & (fifo_axi_valid | ~phy_active_q); + assign rx_valid_o = rx_both_valid; + assign fifo_axi_ready = rx_ready_i && rx_both_valid; - assign rx_o.error = | ({fifo_axi_rx[1].error, fifo_axi_rx[0].error} & phy_active_q); - assign rx_o.last = & ({fifo_axi_rx[1].last, fifo_axi_rx[0].last} | ~phy_active_q); - assign tx_both_ready = & (phy_tx_ready | ~phy_active_q); - assign tx_ready_o = tx_both_ready; - assign phy_tx_valid = tx_both_ready && tx_valid_i; + assign rx_o.error = | ({fifo_axi_rx[1].error, fifo_axi_rx[0].error} & phy_active_q); + assign rx_o.last = & ({fifo_axi_rx[1].last, fifo_axi_rx[0].last} | ~phy_active_q); + assign tx_both_ready = & (phy_tx_ready | ~phy_active_q); + assign tx_ready_o = tx_both_ready; + assign phy_tx_valid = tx_both_ready && tx_valid_i; - assign b_both_valid = & (phy_b_valid | ~phy_active_q); - assign b_valid_o = b_both_valid; - assign phy_b_ready = b_ready_i && b_both_valid; - assign b_error_o = | (phy_b_error & phy_active_q); + assign b_both_valid = & (phy_b_valid | ~phy_active_q); + assign b_valid_o = b_both_valid; + assign phy_b_ready = b_ready_i && b_both_valid; + assign b_error_o = | (phy_b_error & phy_active_q); - assign ts_both_ready = change_phy_active ? '0 : - & (phy_trans_ready | ~phy_active_q); - assign trans_ready_o = ts_both_ready; - assign phy_trans_valid = change_phy_active ? '0 : - phy_trans_ready & {NumPhys{trans_valid_i}} & phy_active_q; + assign ts_both_ready = change_phy_active ? '0 : + & (phy_trans_ready | ~phy_active_q); + assign trans_ready_o = ts_both_ready; + assign phy_trans_valid = change_phy_active ? '0 : + phy_trans_ready & {NumPhys{trans_valid_i}} & phy_active_q; for ( genvar i=0; i