From 8b2f51a790f721a91ee1d03cee567c584b32acf1 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Fri, 29 Nov 2024 22:57:21 +0100 Subject: [PATCH 01/14] Set Hyperram chips conf register in tb --- Bender.yml | 1 + test/axi_hyper_tb.sv | 183 +++++++++++++++++++++++++++++++++------- test/dut_if.sv | 4 +- test/hyperbus_tb_pkg.sv | 32 +++++++ 4 files changed, 189 insertions(+), 31 deletions(-) create mode 100644 test/hyperbus_tb_pkg.sv diff --git a/Bender.yml b/Bender.yml index 905e54d..e5a6717 100644 --- a/Bender.yml +++ b/Bender.yml @@ -47,5 +47,6 @@ sources: - test/fixture_hyperbus.sv - test/hyperbus_tb.sv - test/dut_if.sv + - test/hyperbus_tb_pkg.sv - test/axi_hyper_tb.sv - src/hyperbus.sv diff --git a/test/axi_hyper_tb.sv b/test/axi_hyper_tb.sv index d3d4bb3..aef556f 100644 --- a/test/axi_hyper_tb.sv +++ b/test/axi_hyper_tb.sv @@ -28,12 +28,14 @@ module axi_hyper_tb /// Test time of the DUT parameter time TbTestTime = 4ns ); + import hyperbus_tb_pkg::*; ///////////////////////////// // Axi channel definitions // ///////////////////////////// `include "axi/typedef.svh" `include "axi/assign.svh" + ///////////////////////// // Clock and Reset gen // ///////////////////////// @@ -59,9 +61,11 @@ module axi_hyper_tb logic end_of_sim; - //////////////////////////////// - // Stimuli generator typedefs // - //////////////////////////////// + + /////////////////////// + // AXI Random Master // + /////////////////////// + // AXI master for random data transactions typedef axi_test::axi_rand_master #( .AW ( TbAxiAddrWidthFull ), .DW ( TbAxiDataWidthFull ), @@ -92,20 +96,12 @@ module axi_hyper_tb .TT( TbTestTime ) ) axi_scoreboard_mst_t; - typedef reg_test::reg_driver #( - .AW ( RegBusAW ), - .DW ( RegBusDW ), - .TT ( TbTestTime ) - ) reg_bus_master_t; - - logic s_reg_error; - AXI_BUS_DV #( .AXI_ADDR_WIDTH ( TbAxiAddrWidthFull ), .AXI_DATA_WIDTH ( TbAxiDataWidthFull ), .AXI_ID_WIDTH ( TbAxiIdWidthFull ), .AXI_USER_WIDTH ( TbAxiUserWidthFull ) - ) axi_mst_intf_dv ( + ) axi_rand_intf_dv ( .clk_i ( clk ) ); @@ -118,12 +114,125 @@ module axi_hyper_tb .clk_i ( clk ) ); - `AXI_ASSIGN_MONITOR(score_mst_intf_dv, axi_mst_intf_dv) + AXI_BUS #( + .AXI_ADDR_WIDTH ( TbAxiAddrWidthFull ), + .AXI_DATA_WIDTH ( TbAxiDataWidthFull ), + .AXI_ID_WIDTH ( TbAxiIdWidthFull ), + .AXI_USER_WIDTH ( TbAxiUserWidthFull ) + ) axi_rand_intf (); + + `AXI_ASSIGN_MONITOR(score_mst_intf_dv, axi_rand_intf_dv) + `AXI_ASSIGN(axi_rand_intf, axi_rand_intf_dv) + + + + //////////////////////// + // AXI Control Master // + //////////////////////// + + AXI_BUS_DV #( + .AXI_ADDR_WIDTH ( TbAxiAddrWidthFull ), + .AXI_DATA_WIDTH ( TbAxiDataWidthFull ), + .AXI_ID_WIDTH ( TbAxiIdWidthFull ), + .AXI_USER_WIDTH ( TbAxiUserWidthFull ) + ) axi_ctrl_intf_dv ( + .clk_i ( clk ) + ); + + AXI_BUS #( + .AXI_ADDR_WIDTH ( TbAxiAddrWidthFull ), + .AXI_DATA_WIDTH ( TbAxiDataWidthFull ), + .AXI_ID_WIDTH ( TbAxiIdWidthFull ), + .AXI_USER_WIDTH ( TbAxiUserWidthFull ) + ) axi_ctrl_intf (); + + typedef axi_test::axi_driver #( + .AW ( TbAxiAddrWidthFull ), + .DW ( TbAxiDataWidthFull ), + .IW ( TbAxiIdWidthFull ), + .UW ( TbAxiUserWidthFull ), + .TA ( TbApplTime ), + .TT ( TbTestTime ) + ) axi_ctrl_master_t; + axi_ctrl_master_t axi_ctrl_mst = new( axi_ctrl_intf_dv ); + + `AXI_ASSIGN(axi_ctrl_intf, axi_ctrl_intf_dv) + + logic s_axi_error; + + + ////////////////////////////// + // AXI Control Master Tasks // + ////////////////////////////// + task automatic axi_write_32( + input axi_addr_t addr, + input bit [31:0] data + ); + axi_ctrl_master_t::ax_beat_t ax = new(); + axi_ctrl_master_t::w_beat_t w = new(); + axi_ctrl_master_t::b_beat_t b; + + @(posedge clk); + ax.ax_addr = addr; + ax.ax_id = 0; + ax.ax_len = 0; + ax.ax_size = 2; + ax.ax_burst = axi_pkg::BURST_INCR; + axi_ctrl_mst.send_aw(ax); + w.w_strb = 'h0F; + w.w_data = data; + w.w_last = 1; + axi_ctrl_mst.send_w(w); + axi_ctrl_mst.recv_b(b); + if (b.b_resp != axi_pkg::RESP_OKAY) + $error("[AXI-CTRL] - Write error response: %d!", b.b_resp); + endtask + + + /////////////////// + // Regbus Master // + /////////////////// + typedef reg_test::reg_driver #( + .AW ( RegBusAW ), + .DW ( RegBusDW ), + .TT ( TbTestTime ) + ) reg_bus_master_t; + + logic s_reg_error; REG_BUS #( .ADDR_WIDTH(RegBusAW), .DATA_WIDTH(RegBusDW) ) reg_bus_mst (.clk_i (clk)); + + + + //////////////////// + // AXI Master MUX // + //////////////////// + + AXI_BUS #( + .AXI_ADDR_WIDTH ( TbAxiAddrWidthFull ), + .AXI_DATA_WIDTH ( TbAxiDataWidthFull ), + .AXI_ID_WIDTH ( TbAxiIdWidthFull +1 ), + .AXI_USER_WIDTH ( TbAxiUserWidthFull ) + ) axi_dut_intf (); + + axi_mux_intf #( + .SLV_AXI_ID_WIDTH ( TbAxiIdWidthFull ), + .MST_AXI_ID_WIDTH ( TbAxiIdWidthFull +1 ), + .AXI_ADDR_WIDTH ( TbAxiAddrWidthFull ), + .AXI_DATA_WIDTH ( TbAxiDataWidthFull ), + .AXI_USER_WIDTH ( TbAxiUserWidthFull ), + .NO_SLV_PORTS ( 2 ) + ) i_axi_mst_mux ( + .clk_i ( clk ), + .rst_ni ( rst_n ), + .test_i ( 1'b0 ), + .slv ( { axi_ctrl_intf, axi_rand_intf } ), + .mst ( axi_dut_intf ) + ); + //////////////////// // Address Ranges // @@ -136,18 +245,21 @@ module axi_hyper_tb initial begin : proc_sim_crtl - automatic axi_scoreboard_mst_t mst_scoreboard = new( score_mst_intf_dv ); - automatic axi_rand_master_t axi_master = new( axi_mst_intf_dv ); - automatic reg_bus_master_t reg_master = new( reg_bus_mst ); + automatic axi_scoreboard_mst_t mst_scoreboard = new( score_mst_intf_dv ); + automatic axi_rand_master_t axi_rand_mst = new( axi_rand_intf_dv ); + automatic reg_bus_master_t reg_master = new( reg_bus_mst ); + + automatic s27ks_cfg0_reg_t s27ks_cfg0 = hyperbus_tb_pkg::s27ks_cfg0_default; // Reset the AXI drivers and scoreboards end_of_sim = 1'b0; mst_scoreboard.reset(); - axi_master.reset(); + axi_rand_mst.reset(); + axi_ctrl_mst.reset_master(); reg_master.reset_master(); // Set some mem regions for rand axi master - axi_master.add_memory_region(32'h8000_0000, 32'h8000_0000 + ( TbDramDataWidth * TbDramLenWidth ), axi_pkg::NORMAL_NONCACHEABLE_BUFFERABLE); + axi_rand_mst.add_memory_region(32'h8000_0000, 32'h8000_0000 + ( TbDramDataWidth * TbDramLenWidth ), axi_pkg::NORMAL_NONCACHEABLE_BUFFERABLE); mst_scoreboard.enable_all_checks(); @@ -156,11 +268,24 @@ module axi_hyper_tb #600350ns; + // switch memory address space to register space + reg_master.send_write(32'h7<<2, 1'b1, '1, s_reg_error); + if (s_reg_error != 1'b0) $error("unexpected error"); + + // enable variable latency so we can test RWDS sampling + s27ks_cfg0.fixed_latency_enable = 1'b0; + $display("t3est"); + axi_write_32(32'h8000_0000 + S27KS_CFG0_REG_OFFSET, (s27ks_cfg0 | s27ks_cfg0 << 16)); + + // switch back to memory address space + reg_master.send_write(32'h7<<2, 1'b0, '1, s_reg_error); + if (s_axi_error != 1'b0) $error("unexpected error"); + $display("==========================="); $display("= Random AXI transactions ="); $display("==========================="); - axi_master.run(TbNumReads, TbNumWrites); + axi_rand_mst.run(TbNumReads, TbNumWrites); $display("==========================="); $display("= Test finished ="); @@ -180,13 +305,13 @@ module axi_hyper_tb reg_master.send_write(32'h24,1'b0,'1,s_reg_error); if (s_reg_error != 1'b0) $error("unexpected error"); - axi_master.reset(); + axi_rand_mst.reset(); $display("==========================="); $display("= Random AXI transactions ="); $display("==========================="); - axi_master.run(TbNumReads, TbNumWrites); + axi_rand_mst.run(TbNumReads, TbNumWrites); $display("==========================="); $display("= Test finished ="); @@ -201,13 +326,13 @@ module axi_hyper_tb reg_master.send_write(32'h24,1'b1,'1,s_reg_error); if (s_reg_error != 1'b0) $error("unexpected error"); - axi_master.reset(); + axi_rand_mst.reset(); $display("==========================="); $display("= Random AXI transactions ="); $display("==========================="); - axi_master.run(TbNumReads, TbNumWrites); + axi_rand_mst.run(TbNumReads, TbNumWrites); $display("==========================="); $display("= Test finished ="); @@ -226,7 +351,7 @@ module axi_hyper_tb .TbTestTime ( TbTestTime ), .AxiDataWidth ( TbAxiDataWidthFull ), .AxiAddrWidth ( TbAxiAddrWidthFull ), - .AxiIdWidth ( TbAxiIdWidthFull ), + .AxiIdWidth ( TbAxiIdWidthFull+1 ), .AxiUserWidth ( TbAxiUserWidthFull ), .RegAw ( RegBusAW ), @@ -238,11 +363,11 @@ module axi_hyper_tb .axi_rule_t ( rule_t ) ) i_dut_if ( // clk and rst signal - .clk_i ( clk ), - .rst_ni ( rst_n ), - .end_sim_i ( end_of_sim ), - .axi_slv_if ( axi_mst_intf_dv ), - .reg_slv_if ( reg_bus_mst ) + .clk_i ( clk ), + .rst_ni ( rst_n ), + .end_sim_i ( end_of_sim ), + .axi_slv_if ( axi_dut_intf ), + .reg_slv_if ( reg_bus_mst ) ); endmodule diff --git a/test/dut_if.sv b/test/dut_if.sv index da49ec6..af257eb 100644 --- a/test/dut_if.sv +++ b/test/dut_if.sv @@ -31,8 +31,8 @@ module dut_if input logic rst_ni, input logic end_sim_i, - AXI_BUS_DV.Slave axi_slv_if, - REG_BUS.in reg_slv_if + AXI_BUS.Slave axi_slv_if, + REG_BUS.in reg_slv_if ); localparam int unsigned DRAM_DB_WIDTH = 16; diff --git a/test/hyperbus_tb_pkg.sv b/test/hyperbus_tb_pkg.sv new file mode 100644 index 0000000..a1bc827 --- /dev/null +++ b/test/hyperbus_tb_pkg.sv @@ -0,0 +1,32 @@ +// Copyright 2024 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +package hyperbus_tb_pkg; + + parameter int unsigned S27KS_ID0_REG_OFFSET = 32'h0000_0000; + parameter int unsigned S27KS_ID1_REG_OFFSET = 32'h0000_0002; + parameter int unsigned S27KS_CFG0_REG_OFFSET = 32'h0000_2000; + parameter int unsigned S27KS_CFG1_REG_OFFSET = 32'h0000_2002; + + typedef struct packed { + bit deep_power_done; + bit [2:0] drive_strength; + bit [3:0] reserved; + bit [3:0] initial_latency; + bit fixed_latency_enable; + bit hybrid_burst_enable; + bit [1:0] burst_length; + } s27ks_cfg0_reg_t; + + parameter s27ks_cfg0_reg_t s27ks_cfg0_default = s27ks_cfg0_reg_t'{ + deep_power_done: 1'h1, + drive_strength: 3'h0, + reserved: 4'hF, + initial_latency: 4'h1, + fixed_latency_enable: 1'b1, + hybrid_burst_enable: 1'b1, + burst_length: 2'h3 + }; + +endpackage From 3bcf35c8a8b802cd75bb2affc22a857bc10057cf Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Tue, 28 Oct 2025 15:55:13 +0100 Subject: [PATCH 02/14] Remove unused parameters --- src/hyperbus.sv | 1 - src/hyperbus_phy_if.sv | 106 ++++++++++++++++++++--------------------- 2 files changed, 52 insertions(+), 55 deletions(-) diff --git a/src/hyperbus.sv b/src/hyperbus.sv index cf7212d..ed7bdcf 100644 --- a/src/hyperbus.sv +++ b/src/hyperbus.sv @@ -34,7 +34,6 @@ module hyperbus #( parameter logic [RegDataWidth-1:0] RstChipSpace = 'h1_0000, // 64 KiB: Current maximum HyperBus device size parameter hyperbus_pkg::hyper_cfg_t RstCfg = hyperbus_pkg::gen_RstCfg(NumPhys,MinFreqMHz), parameter int unsigned PhyStartupCycles = 300 * 200, /* us*MHz */ // Conservative maximum frequency estimate - parameter int unsigned AxiLogDepth = 3, parameter int unsigned SyncStages = 2 ) ( input logic clk_phy_i, diff --git a/src/hyperbus_phy_if.sv b/src/hyperbus_phy_if.sv index 3203b63..28bff22 100644 --- a/src/hyperbus_phy_if.sv +++ b/src/hyperbus_phy_if.sv @@ -8,10 +8,8 @@ module hyperbus_phy_if import hyperbus_pkg::*; #( parameter int unsigned IsClockODelayed = 1, parameter int unsigned NumChips = 2, parameter int unsigned NumPhys = 2, - parameter int unsigned TimerWidth = 16, - parameter int unsigned RxFifoLogDepth = 3, parameter int unsigned StartupCycles = 60000, /*MHz*/ // Conservative maximum frequency estimate - parameter int unsigned SyncStages = 2, + parameter int unsigned SyncStages = 2, parameter type hyper_tx_t = logic, parameter type hyper_rx_t = logic )( @@ -61,11 +59,11 @@ module hyperbus_phy_if import hyperbus_pkg::*; #( logic [NumPhys-1:0][1:0] fifo_axi_usage; - logic tx_both_ready, ts_both_ready; - logic rx_both_valid, b_both_valid; + logic tx_both_ready, ts_both_ready; + logic rx_both_valid, b_both_valid; - logic [NumPhys-1:0] phy_tx_ready; - logic phy_tx_valid; + logic [NumPhys-1:0] phy_tx_ready; + logic phy_tx_valid; logic [NumPhys-1:0] phy_trans_ready; logic [NumPhys-1:0] phy_trans_valid; @@ -77,7 +75,7 @@ module hyperbus_phy_if import hyperbus_pkg::*; #( genvar i; generate - if (NumPhys==2) begin : phy_wrap + if (NumPhys==2) begin : phy_wrap logic [NumPhys-1:0] phy_enable; logic [NumPhys-1:0] phy_busy; @@ -156,41 +154,41 @@ module hyperbus_phy_if import hyperbus_pkg::*; #( .busy_o ( phy_busy[i] ), - .rx_data_o ( phy_fifo_rx[i].data ), - .rx_last_o ( phy_fifo_rx[i].last ), - .rx_error_o ( phy_fifo_rx[i].error ), - .rx_valid_o ( phy_fifo_valid[i] ), - .rx_ready_i ( phy_fifo_ready[i] ), + .rx_data_o ( phy_fifo_rx[i].data ), + .rx_last_o ( phy_fifo_rx[i].last ), + .rx_error_o ( phy_fifo_rx[i].error ), + .rx_valid_o ( phy_fifo_valid[i] ), + .rx_ready_i ( phy_fifo_ready[i] ), - .tx_data_i ( tx_i.data[16*i +:16] ), - .tx_strb_i ( tx_i.strb[2*i +:2] ), - .tx_last_i ( tx_i.last ), - .tx_valid_i ( phy_tx_valid ), - .tx_ready_o ( phy_tx_ready[i] ), + .tx_data_i ( tx_i.data[16*i +:16] ), + .tx_strb_i ( tx_i.strb[2*i +:2] ), + .tx_last_i ( tx_i.last ), + .tx_valid_i ( phy_tx_valid ), + .tx_ready_o ( phy_tx_ready[i] ), - .b_error_o ( phy_b_error[i] ), - .b_valid_o ( phy_b_valid[i] ), - .b_ready_i ( phy_b_ready ), + .b_error_o ( phy_b_error[i] ), + .b_valid_o ( phy_b_valid[i] ), + .b_ready_i ( phy_b_ready ), .trans_i ( trans_i ), .trans_cs_i ( trans_cs_i ), .trans_valid_i ( phy_trans_valid[i] ), .trans_ready_o ( phy_trans_ready[i] ), - .hyper_cs_no ( hyper_cs_no[i] ), - .hyper_ck_o ( hyper_ck_o[i] ), - .hyper_ck_no ( hyper_ck_no[i] ), - .hyper_rwds_o ( hyper_rwds_o[i] ), - .hyper_rwds_i ( hyper_rwds_i[i] ), - .hyper_rwds_oe_o( hyper_rwds_oe_o[i] ), - .hyper_dq_i ( hyper_dq_i[i] ), - .hyper_dq_o ( hyper_dq_o[i] ), - .hyper_dq_oe_o ( hyper_dq_oe_o[i] ), - .hyper_reset_no ( hyper_reset_no[i] ) - ); + .hyper_cs_no ( hyper_cs_no[i] ), + .hyper_ck_o ( hyper_ck_o[i] ), + .hyper_ck_no ( hyper_ck_no[i] ), + .hyper_rwds_o ( hyper_rwds_o[i] ), + .hyper_rwds_i ( hyper_rwds_i[i] ), + .hyper_rwds_oe_o( hyper_rwds_oe_o[i] ), + .hyper_dq_i ( hyper_dq_i[i] ), + .hyper_dq_o ( hyper_dq_o[i] ), + .hyper_dq_oe_o ( hyper_dq_oe_o[i] ), + .hyper_reset_no ( hyper_reset_no[i] ) + ); - end // for ( i=0; i Date: Tue, 28 Oct 2025 15:57:13 +0100 Subject: [PATCH 03/14] Fix fifo depth for clk_sys=clk_phy Prevents performance degradations (FIFO bubbles) in situation where the system and PHY are running on the same clock. --- src/hyperbus.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hyperbus.sv b/src/hyperbus.sv index ed7bdcf..7b4a005 100644 --- a/src/hyperbus.sv +++ b/src/hyperbus.sv @@ -28,8 +28,8 @@ module hyperbus #( parameter type reg_rsp_t = logic, parameter type axi_rule_t = logic, // The below have sensible defaults, but should be set on integration! - parameter int unsigned RxFifoLogDepth = 2, - parameter int unsigned TxFifoLogDepth = 2, + parameter int unsigned RxFifoLogDepth = 3, + parameter int unsigned TxFifoLogDepth = 3, parameter logic [RegDataWidth-1:0] RstChipBase = 'h0, // Base address for all chips parameter logic [RegDataWidth-1:0] RstChipSpace = 'h1_0000, // 64 KiB: Current maximum HyperBus device size parameter hyperbus_pkg::hyper_cfg_t RstCfg = hyperbus_pkg::gen_RstCfg(NumPhys,MinFreqMHz), From 04462ee00c05770e1cc749467cb599888e656ae6 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Tue, 28 Oct 2025 16:13:21 +0100 Subject: [PATCH 04/14] Add vsim outputs and model to gitignore --- .gitignore | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index bdf1e1b..a63d836 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ .bender scripts/compile.tcl -models/s27ks0641 \ No newline at end of file +axi_log/ +work/ +transcript +modelsim.ini +vsim.wlf +models/s27ks0641 From eb5edddf63d3c274235bb9242b739a6b792dfe5d Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Tue, 28 Oct 2025 17:08:49 +0100 Subject: [PATCH 05/14] Fix fifo depth for clk_sys=clk_phy Prevents performance degradations (FIFO bubbles) in situation where the system and PHY are running on the same clock. --- src/hyperbus_axi.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hyperbus_axi.sv b/src/hyperbus_axi.sv index ca78ed2..a54012b 100644 --- a/src/hyperbus_axi.sv +++ b/src/hyperbus_axi.sv @@ -170,7 +170,7 @@ module hyperbus_axi #( // ============================ axi_fifo #( - .Depth ( 4 ), + .Depth ( 8 ), .FallThrough ( 1'b0 ), .aw_chan_t ( axi_fifo_aw_chan_t ), .w_chan_t ( axi_fifo_w_chan_t ), @@ -420,7 +420,7 @@ module hyperbus_axi #( stream_fifo #( .FALL_THROUGH ( 1'b0 ), .T ( axi_w_chan_t ), - .DEPTH ( 8 ) + .DEPTH ( 16 ) ) wchan_stream_fifo ( .clk_i, .rst_ni, From 3ee09acd93ddaee2ea1f98c687572d5ef960bea3 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Tue, 28 Oct 2025 17:08:49 +0100 Subject: [PATCH 06/14] Remove unused parameters --- src/hyperbus_phy_if.sv | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/hyperbus_phy_if.sv b/src/hyperbus_phy_if.sv index 28bff22..55b395f 100644 --- a/src/hyperbus_phy_if.sv +++ b/src/hyperbus_phy_if.sv @@ -50,12 +50,12 @@ module hyperbus_phy_if import hyperbus_pkg::*; #( output logic [NumPhys-1:0] hyper_reset_no ); - phy_rx_t [NumPhys-1:0] phy_fifo_rx; - phy_rx_t [NumPhys-1:0] fifo_axi_rx; - logic [NumPhys-1:0] phy_fifo_valid; - logic [NumPhys-1:0] phy_fifo_ready; - logic [NumPhys-1:0] fifo_axi_valid; - logic fifo_axi_ready; + phy_rx_t [NumPhys-1:0] phy_fifo_rx; + phy_rx_t [NumPhys-1:0] fifo_axi_rx; + logic [NumPhys-1:0] phy_fifo_valid; + logic [NumPhys-1:0] phy_fifo_ready; + logic [NumPhys-1:0] fifo_axi_valid; + logic fifo_axi_ready; logic [NumPhys-1:0][1:0] fifo_axi_usage; @@ -150,7 +150,7 @@ module hyperbus_phy_if import hyperbus_pkg::*; #( .rst_ni ( rst_ni ), .test_mode_i ( test_mode_i ), - .cfg_i ( cfg_i ), + .cfg_i ( cfg_i ), .busy_o ( phy_busy[i] ), @@ -202,7 +202,7 @@ module hyperbus_phy_if import hyperbus_pkg::*; #( .rst_ni ( rst_ni ), .test_mode_i ( test_mode_i ), - .cfg_i ( cfg_i ), + .cfg_i ( cfg_i ), .busy_o ( ), From d6ce1df4511f5a4991e9b7a8f644365049228eeb Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Tue, 28 Oct 2025 17:08:49 +0100 Subject: [PATCH 07/14] Add configurable CS to clock-start delay Acording to spec: t_DSV (data strobe valid) which is the time from CS# going low to the first hyperbus clock can be at most 2 clock periods long (12ns@166MHz). This shrinks the RWDS valid window down to one period centered on CA4 (5th data transaction). Meaning it is valid around the 3rd rising edge of CK. Problem: With additional routing delay this may cause the RWDS sample register (clocked by clk_i) to miss the stable period of RWDS. Solution: Delaying the clock is allowed and gives RWDS more time to arrive and creates a larger stable window. It is possible to set this to zero to increase throughput. --- src/hyperbus_cfg_regs.sv | 6 ++++-- src/hyperbus_phy.sv | 29 +++++++++++++++++++++++------ src/hyperbus_pkg.sv | 7 +++++-- 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/src/hyperbus_cfg_regs.sv b/src/hyperbus_cfg_regs.sv index d438007..70ac78e 100644 --- a/src/hyperbus_cfg_regs.sv +++ b/src/hyperbus_cfg_regs.sv @@ -30,10 +30,10 @@ module hyperbus_cfg_regs #( `include "common_cells/registers.svh" // Internal Parameters - localparam int unsigned NumBaseRegs = 11; + localparam int unsigned NumBaseRegs = 12; localparam int unsigned NumRegs = 2*NumChips + NumBaseRegs; localparam int unsigned RegsBits = cf_math_pkg::idx_width(NumRegs); - localparam int unsigned RegStrbWidth = RegDataWidth/8; // TODO ASSERT: Must be power of two >= 16!! + localparam int unsigned RegStrbWidth = RegDataWidth/8; // Data and index types typedef logic [RegsBits-1:0] reg_idx_t; @@ -59,6 +59,7 @@ module hyperbus_cfg_regs #( if (sel_reg_mapped) begin rfield = { crange_q, + reg_data_t'(cfg_q.csn_to_ck_cycles), reg_data_t'(cfg_q.t_csh_cycles), reg_data_t'(cfg_q.which_phy), reg_data_t'(cfg_q.phys_in_use), @@ -99,6 +100,7 @@ module hyperbus_cfg_regs #( 'h8: cfg_d.phys_in_use = (NumPhys==1) ? 0 : ( (~wmask & cfg_q.phys_in_use ) | (wmask & reg_req_i.wdata) ); 'h9: cfg_d.which_phy = (NumPhys==1) ? 0 : ( (~wmask & cfg_q.which_phy ) | (wmask & reg_req_i.wdata) ); 'ha: cfg_d.t_csh_cycles = (~wmask & cfg_q.t_csh_cycles ) | (wmask & reg_req_i.wdata); + 'hb: cfg_d.csn_to_ck_cycles = (~wmask & cfg_q.csn_to_ck_cycles ) | (wmask & reg_req_i.wdata); default: begin {sel_chip, chip_reg} = sel_reg - NumBaseRegs; crange_d[sel_chip][chip_reg] = (~wmask & crange_q[sel_chip][chip_reg]) | (wmask & reg_req_i.wdata); diff --git a/src/hyperbus_phy.sv b/src/hyperbus_phy.sv index 78ed27d..e9229fe 100644 --- a/src/hyperbus_phy.sv +++ b/src/hyperbus_phy.sv @@ -269,15 +269,32 @@ module hyperbus_phy import hyperbus_pkg::*; #( if (trans_valid_i & ~b_pending_q & r_outstand_q == '0) begin tf_d = trans_i; cs_d = trans_cs_i; - // Send 3 CA words (t_CSS respected through clock delay) - timer_d = 2; - state_d = SendCA; - // Enable output driver (needs to be enabled one cycle - // earlier since tri-state enables of IO pads are quite - // slow compared to the data pins) + + if(cfg_i.csn_to_ck_cycles != 0) begin + // asser CS but delay hyper_ck to allow more time + // for memory to drive RWDS (to satisfy t_DSV) + state_d = DelayCK; + timer_d = cfg_i.csn_to_ck_cycles -1; + end else begin + // max throughput when memory RWDS signal arrives early + state_d = SendCA; + // Send 3 CA words (t_CSS respected through clock delay) + timer_d = 2; + end + + // Enable output driver (needs to be enabled at least + // one cycle earlier since tri-state enables of IO pads + // are quite slow compared to the data pins) trx_tx_data_oe = 1'b1; end end + DelayCK: begin + trx_clk_ena = 1'b0; + if (ctl_timer_zero) begin + timer_d = 2; // Send 3 CA words + state_d = SendCA; + end + end SendCA: begin // Dataflow handled outside FSM trx_clk_ena = 1'b1; diff --git a/src/hyperbus_pkg.sv b/src/hyperbus_pkg.sv index 2ea4f47..31e0482 100644 --- a/src/hyperbus_pkg.sv +++ b/src/hyperbus_pkg.sv @@ -20,7 +20,8 @@ package hyperbus_pkg; logic address_space; logic phys_in_use; logic which_phy; - logic [3:0] t_csh_cycles; //add an configurable Tcsh for high freq operation(200MHz Hyperram) + logic [3:0] t_csh_cycles; // add an configurable Tcsh for high freq operation(200MHz Hyperram) + logic [3:0] csn_to_ck_cycles; // delay hyper_ck after CS is asserted (more time for t_DSV) } hyper_cfg_t; typedef struct packed { @@ -40,6 +41,7 @@ package hyperbus_pkg; typedef enum logic[3:0] { Startup, Idle, + DelayCK, SendCA, WaitLatAccess, Read, @@ -74,7 +76,8 @@ package hyperbus_pkg; address_space: 'b0, phys_in_use: NumPhys-1, which_phy: NumPhys-1, - t_csh_cycles: 'h1 + t_csh_cycles: 'h1, + csn_to_ck_cycles: 'h2 }; return cfg; From 239d0440a4a2c8948925b369f5d250b029debff4 Mon Sep 17 00:00:00 2001 From: phsauter Date: Tue, 28 Oct 2025 17:09:06 +0100 Subject: [PATCH 08/14] Enforce optimal worst-case RWDS sampling For the worst case RWDS timing (t_DSV max, t_CSS min and t_CKDS min) the window of validity for RWDS is around one clock period centered around the 3rd rising edge of CK. This ensures we sample exactly then. Other sampling may lead to improper results (from sampling high Z) and increases the risk of metastability. For long chip-to-chip delays (or slow pads) it may still be necessary to increase the CS falling edge to first CK edge time. --- src/hyperbus_trx.sv | 50 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 44 insertions(+), 6 deletions(-) diff --git a/src/hyperbus_trx.sv b/src/hyperbus_trx.sv index b442b10..53de430 100644 --- a/src/hyperbus_trx.sv +++ b/src/hyperbus_trx.sv @@ -65,6 +65,11 @@ module hyperbus_trx #( logic rx_rwds_fifo_valid; logic rx_rwds_fifo_ready; + // used to time the sampling of RWDS to determine additional latency + logic [2:0] ck_cnt_d, ck_cnt_q; // TODO: check in sim if this can be one less + logic rwds_sample_ena; + logic rwds_sample_clk; + // Feed through async reset assign hyper_reset_no = rst_ni; @@ -127,16 +132,49 @@ module hyperbus_trx #( end end - // Sample RWDS on demand for extra latency determination - always_ff @(posedge clk_i or negedge rst_ni) begin : proc_ff_rwds_sample - if (~rst_ni) rwds_sample_o <= '0; - else if (rwds_sample_ena_i) rwds_sample_o <= hyper_rwds_i; - end - // ======== // RX // ======== + // The following guarantees a proper worst-case sampling of RWDS. + // RWDS may only be valid (and stable) for a single period around + // the 3rd hyper_ck_o rising edge (see t_DSV, t_CSS, t_CKDS @ 166MHz). + // We create a clock gate that open just for this window from falling + // to falling edge of hyper_ck around the 3rd rising edge. + // Then and only then will the sample be taken. + + // Constraints: + // ena_i is clocked from clk_i, ck_cnt with ~clk_90_i and + // rwds_sample_o with clk_90_i. + // The paths are very short and as long the clk to clk_90 + // constraints are proper (clk_90 being a derived shifted clock) + // this should not cause any problems + always_comb begin : gen_ck_counter + ck_cnt_d = clk_cnt_q; + // controlled by above FSM, only true in SendCA state + if(trx_rwds_sample_ena) + ck_cnt_d = ck_cnt_q +1; + // reset counter when the transaction ends (CS goes high) + if(hyper_cs_no) + ck_cnt_d = '0; + end + // clocked with falling edge, creates an active clk-gate around rising edge + `FF(ck_cnt_q, ck_cnt_d, '0, hyper_ck_no); + + assign rwds_sample_ena = (ck_cnt_q == 2) & rwds_sample_ena_i; // TODO: Check proper sampling point in sim + + // Gate the sampling of rwds to the third rising CK_90 edge only + tc_clk_gating i_rwds_in_clk_gate ( + .clk_i ( hyper_ck_o ), + .en_i ( rwds_sample_ena ), + .test_en_i ( test_mode_i ), + .clk_o ( rwds_sample_clk ) + ); + // Sample RWDS on demand for extra latency determination + `FF(rwds_sample_o, hyper_rwds_i, '0, rwds_sample_clk); + //------------------------------------------------------------------------- + + // Set and Reset RX clock enable always_ff @(posedge clk_i or negedge rst_ni) begin : proc_ff_rx_delay if (~rst_ni) rx_rwds_clk_ena <= 1'b0; From a55f9763d2b5301b1a324c2c96bc67d74e742b54 Mon Sep 17 00:00:00 2001 From: phsauter Date: Tue, 28 Oct 2025 17:09:06 +0100 Subject: [PATCH 09/14] Remove phy-fsm singals from RWDS sampling Decouples the clock domain better, only the rwds_sample_o signal crosses between phy and system clk. --- src/hyperbus_trx.sv | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/hyperbus_trx.sv b/src/hyperbus_trx.sv index 53de430..ebf803e 100644 --- a/src/hyperbus_trx.sv +++ b/src/hyperbus_trx.sv @@ -144,24 +144,23 @@ module hyperbus_trx #( // Then and only then will the sample be taken. // Constraints: - // ena_i is clocked from clk_i, ck_cnt with ~clk_90_i and - // rwds_sample_o with clk_90_i. - // The paths are very short and as long the clk to clk_90 - // constraints are proper (clk_90 being a derived shifted clock) - // this should not cause any problems + // As long as the clk to clk_90 constraints are proper + // (clk_90 being a derived shifted clock) this should not cause problems + always_comb begin : gen_ck_counter - ck_cnt_d = clk_cnt_q; - // controlled by above FSM, only true in SendCA state - if(trx_rwds_sample_ena) - ck_cnt_d = ck_cnt_q +1; + ck_cnt_d = ck_cnt_q +1; // count hyper_ck falling edges + // reset counter when the transaction ends (CS goes high) - if(hyper_cs_no) + if(hyper_cs_no) begin ck_cnt_d = '0; + end else if(ck_cnt_q == 3) begin // stop counting once sample is taken + ck_cnt_d = ck_cnt_q; + end end // clocked with falling edge, creates an active clk-gate around rising edge `FF(ck_cnt_q, ck_cnt_d, '0, hyper_ck_no); - assign rwds_sample_ena = (ck_cnt_q == 2) & rwds_sample_ena_i; // TODO: Check proper sampling point in sim + assign rwds_sample_ena = (ck_cnt_q == 2); // TODO: Check proper sampling point in sim // Gate the sampling of rwds to the third rising CK_90 edge only tc_clk_gating i_rwds_in_clk_gate ( From abf586bcac51386ce22710c9367b54f65874cd2b Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Tue, 28 Oct 2025 17:09:06 +0100 Subject: [PATCH 10/14] Refactor RWDS sampling to separate module --- Bender.yml | 1 + src/hyperbus_rwds_sampler.sv | 86 ++++++++++++++++++++++++++++++++++++ src/hyperbus_trx.sv | 49 +++++--------------- 3 files changed, 99 insertions(+), 37 deletions(-) create mode 100644 src/hyperbus_rwds_sampler.sv diff --git a/Bender.yml b/Bender.yml index e5a6717..1ea23f4 100644 --- a/Bender.yml +++ b/Bender.yml @@ -34,6 +34,7 @@ sources: - src/hyperbus_phy2r.sv - src/hyperbus_ddr_out.sv - src/hyperbus_delay.sv + - src/hyperbus_rwds_sampler.sv - src/hyperbus_trx.sv - src/hyperbus_cfg_regs.sv - src/hyperbus_phy.sv diff --git a/src/hyperbus_rwds_sampler.sv b/src/hyperbus_rwds_sampler.sv new file mode 100644 index 0000000..1237b5f --- /dev/null +++ b/src/hyperbus_rwds_sampler.sv @@ -0,0 +1,86 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Philippe Sauter + +// This modules guarantees proper worst-case sampling of RWDS. +// RWDS may only be valid (and stable) for a single period around +// the 3rd hyper_ck_o rising edge (see t_DSV, t_CSS, t_CKDS @ 166MHz). +// Since there may be arbitrary pad and PCBs delays, the sampling edge +// is fully configurable (edge number and polarity). +// A gated clock that is only active around one edge is created +// and then the sample is taken only at the selected edge. +// The final sample is saved into a register in the phy clock domain. +// +// It is not possible to sample on the very first rising clock edge +// with this mechanism. +// Therefore cfg_edge_idx_i = 0 selects the first failling edge +// or the subsequent rising edge, depending on cfg_edge_pol_i. +// With this naming scheme, the default edge should be idx=1, pol=1. +// +// Constraints: +// cfg* signals are pseudostatic (set_false_path -setup or set_multicycle_path) + +`include "common_cells/registers.svh" + +module hyperbus_rwds_sampler import hyperbus_pkg::*; #() +( + // Global signals + input logic clk_i, + input logic rst_ni, + input logic test_mode_i, + + // Transciever control: facing controller + output logic rwds_sample_o, + + // Physical interace: facing HyperBus + input logic hyper_cs_ni, + input logic hyper_ck_i, + input logic hyper_ck_ni, + input logic hyper_rwds_i, + input logic hyper_rwds_oe_i +); + + // used to time the sampling of RWDS to determine additional latency + logic [2:0] ck_cnt_d, ck_cnt_q; // TODO: check in sim if this can be one less + logic rwds_sample_ena; + logic rwds_sample_clk; + + // The following guarantees a proper worst-case sampling of RWDS. + // RWDS may only be valid (and stable) for a single period around + // the 3rd hyper_ck_o rising edge (see t_DSV, t_CSS, t_CKDS @ 166MHz). + // We create a clock gate that open just for this window from falling + // to falling edge of hyper_ck around the 3rd rising edge. + // Then and only then will the sample be taken. + + // Constraints: + // As long as the clk to clk_90 constraints are proper + // (clk_90 being a derived shifted clock) this should not cause problems + + always_comb begin : gen_ck_counter + ck_cnt_d = ck_cnt_q +1; // count hyper_ck falling edges + + // reset counter when the transaction ends (CS goes high) + if(hyper_cs_ni) begin + ck_cnt_d = '0; + end else if(ck_cnt_q == 3) begin // stop counting once sample is taken + ck_cnt_d = ck_cnt_q; + end + end + // clocked with falling edge, creates an active clk-gate around rising edge + `FF(ck_cnt_q, ck_cnt_d, '0, hyper_ck_ni); + + assign rwds_sample_ena = (ck_cnt_q == 2); // TODO: Check proper sampling point in sim + + // Gate the sampling of rwds to the third rising CK_90 edge only + tc_clk_gating i_rwds_in_clk_gate ( + .clk_i ( hyper_ck_i ), + .en_i ( rwds_sample_ena ), + .test_en_i ( test_mode_i ), + .clk_o ( rwds_sample_clk ) + ); + // Sample RWDS on demand for extra latency determination + `FF(rwds_sample_o, hyper_rwds_i, '0, rwds_sample_clk); + +endmodule diff --git a/src/hyperbus_trx.sv b/src/hyperbus_trx.sv index ebf803e..fc6772b 100644 --- a/src/hyperbus_trx.sv +++ b/src/hyperbus_trx.sv @@ -136,43 +136,18 @@ module hyperbus_trx #( // RX // ======== - // The following guarantees a proper worst-case sampling of RWDS. - // RWDS may only be valid (and stable) for a single period around - // the 3rd hyper_ck_o rising edge (see t_DSV, t_CSS, t_CKDS @ 166MHz). - // We create a clock gate that open just for this window from falling - // to falling edge of hyper_ck around the 3rd rising edge. - // Then and only then will the sample be taken. - - // Constraints: - // As long as the clk to clk_90 constraints are proper - // (clk_90 being a derived shifted clock) this should not cause problems - - always_comb begin : gen_ck_counter - ck_cnt_d = ck_cnt_q +1; // count hyper_ck falling edges - - // reset counter when the transaction ends (CS goes high) - if(hyper_cs_no) begin - ck_cnt_d = '0; - end else if(ck_cnt_q == 3) begin // stop counting once sample is taken - ck_cnt_d = ck_cnt_q; - end - end - // clocked with falling edge, creates an active clk-gate around rising edge - `FF(ck_cnt_q, ck_cnt_d, '0, hyper_ck_no); - - assign rwds_sample_ena = (ck_cnt_q == 2); // TODO: Check proper sampling point in sim - - // Gate the sampling of rwds to the third rising CK_90 edge only - tc_clk_gating i_rwds_in_clk_gate ( - .clk_i ( hyper_ck_o ), - .en_i ( rwds_sample_ena ), - .test_en_i ( test_mode_i ), - .clk_o ( rwds_sample_clk ) - ); - // Sample RWDS on demand for extra latency determination - `FF(rwds_sample_o, hyper_rwds_i, '0, rwds_sample_clk); - //------------------------------------------------------------------------- - + // guarantees proper worst-case sampling of RWDS + hyperbus_rwds_sampler i_rwds_sampler ( + .clk_i, + .rst_ni, + .test_mode_i, + .rwds_sample_o, + .hyper_cs_ni ( hyper_cs_no ), + .hyper_ck_i ( hyper_ck_o ), + .hyper_ck_ni ( hyper_ck_no ), + .hyper_rwds_i ( hyper_rwds_i ), + .hyper_rwds_oe_i ( ) + ) // Set and Reset RX clock enable always_ff @(posedge clk_i or negedge rst_ni) begin : proc_ff_rx_delay From 34d57040a1271e7d0614c06090ad68dda6ecee61 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Tue, 28 Oct 2025 17:09:06 +0100 Subject: [PATCH 11/14] More configurable RWDS sampling Exact sampling edge is adjustable. --- src/hyperbus_rwds_sampler.sv | 102 ++++++++++++++++++++--------------- src/hyperbus_trx.sv | 23 ++++---- 2 files changed, 73 insertions(+), 52 deletions(-) diff --git a/src/hyperbus_rwds_sampler.sv b/src/hyperbus_rwds_sampler.sv index 1237b5f..b5f1a7d 100644 --- a/src/hyperbus_rwds_sampler.sv +++ b/src/hyperbus_rwds_sampler.sv @@ -27,60 +27,76 @@ module hyperbus_rwds_sampler import hyperbus_pkg::*; #() ( // Global signals - input logic clk_i, - input logic rst_ni, - input logic test_mode_i, - - // Transciever control: facing controller - output logic rwds_sample_o, - - // Physical interace: facing HyperBus - input logic hyper_cs_ni, - input logic hyper_ck_i, - input logic hyper_ck_ni, - input logic hyper_rwds_i, - input logic hyper_rwds_oe_i + input logic clk_i, // phy clock + input logic rst_ni, + input logic test_mode_i, + + input logic [1:0] cfg_edge_idx_i, // #edge where rwds is sampled + input logic cfg_edge_pol_i, // 1: rising, 0: falling + + // sampled value going to PHY-FSM + output logic rwds_sample_o, + + // Physical interface: facing HyperBus + input logic hyper_cs_ni, + input logic hyper_ck_i, + input logic hyper_ck_ni, + input logic hyper_rwds_i ); // used to time the sampling of RWDS to determine additional latency - logic [2:0] ck_cnt_d, ck_cnt_q; // TODO: check in sim if this can be one less - logic rwds_sample_ena; - logic rwds_sample_clk; - - // The following guarantees a proper worst-case sampling of RWDS. - // RWDS may only be valid (and stable) for a single period around - // the 3rd hyper_ck_o rising edge (see t_DSV, t_CSS, t_CKDS @ 166MHz). - // We create a clock gate that open just for this window from falling - // to falling edge of hyper_ck around the 3rd rising edge. - // Then and only then will the sample be taken. - - // Constraints: - // As long as the clk to clk_90 constraints are proper - // (clk_90 being a derived shifted clock) this should not cause problems + logic [2:0] cnt_edge_d, cnt_edge_q; // one bit larger than config + logic [2:0] cnt_target_value; + logic cnt_clk; // clock used for edge counting + logic sampling_clk, sampling_clk_gated; // clock used for sampling + logic enable_sampling; + logic rwds_sample; + + assign cnt_target_value = cfg_edge_idx_i + 1; - always_comb begin : gen_ck_counter - ck_cnt_d = ck_cnt_q +1; // count hyper_ck falling edges + always_comb begin : gen_edge_cnt + cnt_edge_d = cnt_edge_q +1; // count hyper_ck(_n) edges // reset counter when the transaction ends (CS goes high) if(hyper_cs_ni) begin - ck_cnt_d = '0; - end else if(ck_cnt_q == 3) begin // stop counting once sample is taken - ck_cnt_d = ck_cnt_q; + cnt_edge_d = '0; + end else if(cnt_edge_q == '1) begin // stop counting to avoid overflow + cnt_edge_d = cnt_edge_q; end end - // clocked with falling edge, creates an active clk-gate around rising edge - `FF(ck_cnt_q, ck_cnt_d, '0, hyper_ck_ni); + // sampling on the rising edge requires counting on falling edges to create + // a window where the clk-gate is transparent around rising edge and vice versa + tc_clk_mux2 i_cnt_clk_mux ( + .clk0_i ( hyper_ck_ni ), + .clk1_i ( hyper_ck_i ), + .clk_sel_i ( ~cfg_edge_pol_i ), + .clk_o ( cnt_clk ) + ); - assign rwds_sample_ena = (ck_cnt_q == 2); // TODO: Check proper sampling point in sim + `FF(cnt_edge_q, cnt_edge_d, '0, cnt_clk); - // Gate the sampling of rwds to the third rising CK_90 edge only - tc_clk_gating i_rwds_in_clk_gate ( - .clk_i ( hyper_ck_i ), - .en_i ( rwds_sample_ena ), - .test_en_i ( test_mode_i ), - .clk_o ( rwds_sample_clk ) + // TODO: Check proper sampling point in sim + assign enable_sampling = (cnt_edge_q == cnt_target_value); + + tc_clk_mux2 i_sampling_clk_mux ( + .clk0_i ( hyper_ck_ni ), + .clk1_i ( hyper_ck_i ), + .clk_sel_i ( cfg_edge_pol_i ), + .clk_o ( sampling_clk ) + ); + + // gate the sampling of rwds to the correct rising clock edge + tc_clk_gating i_rwds_sample_rise_gate ( + .clk_i ( sampling_clk ), + .en_i ( enable_sampling ), + .test_en_i ( test_mode_i ), + .clk_o ( sampling_clk_gated ) ); - // Sample RWDS on demand for extra latency determination - `FF(rwds_sample_o, hyper_rwds_i, '0, rwds_sample_clk); + + // sample rwds exactly once at the correct edge + `FF(rwds_sample, hyper_rwds_i, '0, sampling_clk_gated); + + // pass rwds to phy-clock domain + `FF(rwds_sample_o, rwds_sample, '0, clk_i); endmodule diff --git a/src/hyperbus_trx.sv b/src/hyperbus_trx.sv index fc6772b..bb00782 100644 --- a/src/hyperbus_trx.sv +++ b/src/hyperbus_trx.sv @@ -13,11 +13,15 @@ module hyperbus_trx #( parameter int unsigned SyncStages = 2 )( // Global signals - input logic clk_i, - input logic clk_i_90, - input logic rst_ni, - input logic test_mode_i, - // Transciever control: facing controller + input logic clk_i, + input logic clk_i_90, + input logic rst_ni, + input logic test_mode_i, + + input logic [1:0] cfg_edge_idx_i, + input logic cfg_edge_pol_i, + + // Transceiver control: facing controller input logic [NumChips-1:0] cs_i, input logic cs_ena_i, output logic rwds_sample_o, @@ -36,7 +40,7 @@ module hyperbus_trx #( output logic [15:0] rx_data_o, output logic rx_valid_o, input logic rx_ready_i, - // Physical interace: facing HyperBus + // Physical interface: facing HyperBus output logic [NumChips-1:0] hyper_cs_no, output logic hyper_ck_o, output logic hyper_ck_no, @@ -136,17 +140,18 @@ module hyperbus_trx #( // RX // ======== - // guarantees proper worst-case sampling of RWDS + // sample RWDS for extra latency determination (adjustable sampling edge) hyperbus_rwds_sampler i_rwds_sampler ( .clk_i, .rst_ni, .test_mode_i, + .cfg_edge_idx_i, + .cfg_edge_pol_i, .rwds_sample_o, .hyper_cs_ni ( hyper_cs_no ), .hyper_ck_i ( hyper_ck_o ), .hyper_ck_ni ( hyper_ck_no ), - .hyper_rwds_i ( hyper_rwds_i ), - .hyper_rwds_oe_i ( ) + .hyper_rwds_i ( hyper_rwds_i ) ) // Set and Reset RX clock enable From 7c49294f5e397f881071dffa1b47f3d4a227e453 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Tue, 28 Oct 2025 17:09:06 +0100 Subject: [PATCH 12/14] Delay additional latency decision, add cfg regs We want to give the RWDS sampler as much time as possible to get a value. So we delay the additional latency decision to the latest point possible. --- src/hyperbus_cfg_regs.sv | 4 ++- src/hyperbus_phy.sv | 78 +++++++++++++++++++++++++++------------- src/hyperbus_pkg.sv | 37 ++++++++++++------- src/hyperbus_trx.sv | 2 +- 4 files changed, 81 insertions(+), 40 deletions(-) diff --git a/src/hyperbus_cfg_regs.sv b/src/hyperbus_cfg_regs.sv index 70ac78e..680aa67 100644 --- a/src/hyperbus_cfg_regs.sv +++ b/src/hyperbus_cfg_regs.sv @@ -30,7 +30,7 @@ module hyperbus_cfg_regs #( `include "common_cells/registers.svh" // Internal Parameters - localparam int unsigned NumBaseRegs = 12; + localparam int unsigned NumBaseRegs = 13; localparam int unsigned NumRegs = 2*NumChips + NumBaseRegs; localparam int unsigned RegsBits = cf_math_pkg::idx_width(NumRegs); localparam int unsigned RegStrbWidth = RegDataWidth/8; @@ -59,6 +59,7 @@ module hyperbus_cfg_regs #( if (sel_reg_mapped) begin rfield = { crange_q, + reg_data_t'(cfg_q.rwds_sample_edge), reg_data_t'(cfg_q.csn_to_ck_cycles), reg_data_t'(cfg_q.t_csh_cycles), reg_data_t'(cfg_q.which_phy), @@ -101,6 +102,7 @@ module hyperbus_cfg_regs #( 'h9: cfg_d.which_phy = (NumPhys==1) ? 0 : ( (~wmask & cfg_q.which_phy ) | (wmask & reg_req_i.wdata) ); 'ha: cfg_d.t_csh_cycles = (~wmask & cfg_q.t_csh_cycles ) | (wmask & reg_req_i.wdata); 'hb: cfg_d.csn_to_ck_cycles = (~wmask & cfg_q.csn_to_ck_cycles ) | (wmask & reg_req_i.wdata); + 'hc: cfg_d.rwds_sample_edge = (~wmask & cfg_q.rwds_sample_edge ) | (wmask & reg_req_i.wdata); default: begin {sel_chip, chip_reg} = sel_reg - NumBaseRegs; crange_d[sel_chip][chip_reg] = (~wmask & crange_q[sel_chip][chip_reg]) | (wmask & reg_req_i.wdata); diff --git a/src/hyperbus_phy.sv b/src/hyperbus_phy.sv index e9229fe..47e6420 100644 --- a/src/hyperbus_phy.sv +++ b/src/hyperbus_phy.sv @@ -123,22 +123,24 @@ module hyperbus_phy import hyperbus_pkg::*; #( .clk_i_90, .rst_ni, .test_mode_i, - .cs_i ( cs_q ), - .cs_ena_i ( trx_cs_ena ), - .rwds_sample_o ( trx_rwds_sample ), - .rwds_sample_ena_i ( trx_rwds_sample_ena ), - .tx_clk_delay_i ( cfg_i.t_tx_clk_delay ), - .tx_clk_ena_i ( trx_clk_ena ), - .tx_data_i ( trx_tx_data ), - .tx_data_oe_i ( trx_tx_data_oe ), - .tx_rwds_i ( trx_tx_rwds ), - .tx_rwds_oe_i ( trx_tx_rwds_oe ), - .rx_clk_delay_i ( cfg_i.t_rx_clk_delay ), - .rx_clk_set_i ( trx_rx_clk_set ), - .rx_clk_reset_i ( trx_rx_clk_reset ), - .rx_data_o ( trx_rx_data ), - .rx_valid_o ( trx_rx_valid ), - .rx_ready_i ( trx_rx_ready ), + .cfg_edge_idx_i ( cfg_i.rwds_sample_edge.cylce_idx ), + .cfg_edge_pol_i ( cfg_i.rwds_sample_edge.polarity ), + .cs_i ( cs_q ), + .cs_ena_i ( trx_cs_ena ), + .rwds_sample_o ( trx_rwds_sample ), + .rwds_sample_ena_i ( trx_rwds_sample_ena ), + .tx_clk_delay_i ( cfg_i.t_tx_clk_delay ), + .tx_clk_ena_i ( trx_clk_ena ), + .tx_data_i ( trx_tx_data ), + .tx_data_oe_i ( trx_tx_data_oe ), + .tx_rwds_i ( trx_tx_rwds ), + .tx_rwds_oe_i ( trx_tx_rwds_oe ), + .rx_clk_delay_i ( cfg_i.t_rx_clk_delay ), + .rx_clk_set_i ( trx_rx_clk_set ), + .rx_clk_reset_i ( trx_rx_clk_reset ), + .rx_data_o ( trx_rx_data ), + .rx_valid_o ( trx_rx_valid ), + .rx_ready_i ( trx_rx_ready ), .hyper_cs_no, .hyper_ck_o, .hyper_ck_no, @@ -271,7 +273,7 @@ module hyperbus_phy import hyperbus_pkg::*; #( cs_d = trans_cs_i; if(cfg_i.csn_to_ck_cycles != 0) begin - // asser CS but delay hyper_ck to allow more time + // assert CS but delay hyper_ck to allow more time // for memory to drive RWDS (to satisfy t_DSV) state_d = DelayCK; timer_d = cfg_i.csn_to_ck_cycles -1; @@ -305,7 +307,7 @@ module hyperbus_phy import hyperbus_pkg::*; #( timer_d = cfg_i.t_burst_max; state_d = Write; end else begin - timer_d = TimerWidth'(cfg_i.t_latency_access) << ctl_add_latency; + timer_d = TimerWidth'(cfg_i.t_latency_access); state_d = WaitLatAccess; end end @@ -313,18 +315,44 @@ module hyperbus_phy import hyperbus_pkg::*; #( WaitLatAccess: begin trx_clk_ena = 1'b1; trx_tx_data_oe = 1'b1; - // Substract cycle for last CA and another for state delay + // ctl_add_latency may arrive at any time (adjustable RWDS sampling) + // If no additional latency required: + if (~ctl_add_latency) begin + // Substract cycle for last CA and another for state delay + if(ctl_timer_two) begin + timer_d = cfg_i.t_burst_max; + // Switch to write or read phase and already start + // turnaround of tri-state driver (depending on latency + // config and if read or write transaction). + if (tf_q.write) begin + state_d = Write; + trx_tx_data_oe = 1'b1; + // For zero latency writes, we must not drive the RWDS + // signal (see specs page 9). Depending on the latency + // mode we thus drive only the DQ signals or DQ + RWDS. + trx_tx_rwds_oe = ~ctl_write_zero_lat; + end else begin + state_d = Read; + trx_tx_data_oe = 1'b0; + trx_tx_rwds_oe = 1'b0; + end + end + end else if (ctl_timer_one) begin + // instead of going to 0, add another latency count + state_d = WaitAddLatAccess; + timer_d = TimerWidth'(cfg_i.t_latency_access); + end + end + WaitAddLatAccess: begin + // Same as WaitLatAccess but without possibility + // of adding another latency count + trx_clk_ena = 1'b1; + trx_tx_data_oe = 1'b1; if (ctl_timer_two) begin timer_d = cfg_i.t_burst_max; - // Switch to write or read phase and already start - // turnaround of tri-state driver (depending on latency - // config and if read or write transaction). if (tf_q.write) begin state_d = Write; trx_tx_data_oe = 1'b1; - // For zero latency writes, we must not drive the RWDS - // signal (see specs page 9). Depending on the latency - // mode we thus drive only the DQ signals or DQ + RWDS. trx_tx_rwds_oe = ~ctl_write_zero_lat; end else begin state_d = Read; diff --git a/src/hyperbus_pkg.sv b/src/hyperbus_pkg.sv index 31e0482..cf9126b 100644 --- a/src/hyperbus_pkg.sv +++ b/src/hyperbus_pkg.sv @@ -8,20 +8,27 @@ package hyperbus_pkg; localparam unsigned HyperBurstWidth = 8 + $clog2(1024/16) + 1; typedef logic [HyperBurstWidth-1:0] hyper_blen_t; + + typedef struct packed { + logic [1:0] cylce_idx; + logic polarity; + } hyper_cfg_rwds_t; + // configuration type typedef struct packed { - logic [3:0] t_latency_access; - logic en_latency_additional; - logic [15:0] t_burst_max; - logic [3:0] t_read_write_recovery; - logic [3:0] t_rx_clk_delay; - logic [3:0] t_tx_clk_delay; - logic [4:0] address_mask_msb; - logic address_space; - logic phys_in_use; - logic which_phy; - logic [3:0] t_csh_cycles; // add an configurable Tcsh for high freq operation(200MHz Hyperram) - logic [3:0] csn_to_ck_cycles; // delay hyper_ck after CS is asserted (more time for t_DSV) + logic [3:0] t_latency_access; + logic en_latency_additional; + logic [15:0] t_burst_max; + logic [3:0] t_read_write_recovery; + logic [3:0] t_rx_clk_delay; + logic [3:0] t_tx_clk_delay; + logic [4:0] address_mask_msb; + logic address_space; + logic phys_in_use; + logic which_phy; + logic [3:0] t_csh_cycles; // add an configurable Tcsh for high freq operation(200MHz Hyperram) + logic [3:0] csn_to_ck_cycles; // delay hyper_ck after CS is asserted (more time for t_DSV) + hyper_cfg_rwds_t rwds_sample_edge; } hyper_cfg_t; typedef struct packed { @@ -44,6 +51,7 @@ package hyperbus_pkg; DelayCK, SendCA, WaitLatAccess, + WaitAddLatAccess, Read, Write, WaitXfer, @@ -77,7 +85,10 @@ package hyperbus_pkg; phys_in_use: NumPhys-1, which_phy: NumPhys-1, t_csh_cycles: 'h1, - csn_to_ck_cycles: 'h2 + csn_to_ck_cycles: 'h2, + rwds_sample_edge: hyper_cfg_rwds_t'{ // third rising edge, see hyperbus_rwds_sampler + cylce_idx: 'h1, + polarity: 'b1 } }; return cfg; diff --git a/src/hyperbus_trx.sv b/src/hyperbus_trx.sv index bb00782..0a15286 100644 --- a/src/hyperbus_trx.sv +++ b/src/hyperbus_trx.sv @@ -152,7 +152,7 @@ module hyperbus_trx #( .hyper_ck_i ( hyper_ck_o ), .hyper_ck_ni ( hyper_ck_no ), .hyper_rwds_i ( hyper_rwds_i ) - ) + ); // Set and Reset RX clock enable always_ff @(posedge clk_i or negedge rst_ni) begin : proc_ff_rx_delay From 8c986b50e899a51a6ee32c558d8f5d920eee0df9 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Tue, 28 Oct 2025 17:09:06 +0100 Subject: [PATCH 13/14] Fix reset of rwds sampling counter The reset was not being triggered since the gated clock stops before chip select goes high. A sticky bit driven by the ungated clock is used to indicate start of transfer. The counter reaching the target value is used to reset the sticky bit. Counter only counts while it is set (when the transfer starts until the target is reached). --- src/hyperbus_rwds_sampler.sv | 41 +++++++++++++++++++++++++----------- src/hyperbus_trx.sv | 3 ++- 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/src/hyperbus_rwds_sampler.sv b/src/hyperbus_rwds_sampler.sv index b5f1a7d..01dac40 100644 --- a/src/hyperbus_rwds_sampler.sv +++ b/src/hyperbus_rwds_sampler.sv @@ -37,7 +37,10 @@ module hyperbus_rwds_sampler import hyperbus_pkg::*; #() // sampled value going to PHY-FSM output logic rwds_sample_o, - // Physical interface: facing HyperBus + // ungated hyperbus clock + input logic tx_clk_90_i, + + // physical HyperBus signals input logic hyper_cs_ni, input logic hyper_ck_i, input logic hyper_ck_ni, @@ -46,22 +49,24 @@ module hyperbus_rwds_sampler import hyperbus_pkg::*; #() // used to time the sampling of RWDS to determine additional latency logic [2:0] cnt_edge_d, cnt_edge_q; // one bit larger than config + logic start_of_tf_d, start_of_tf_q; // start of transfer indicator logic [2:0] cnt_target_value; + logic cnt_at_target; logic cnt_clk; // clock used for edge counting logic sampling_clk, sampling_clk_gated; // clock used for sampling - logic enable_sampling; + logic enable_sampling; // sampling clock gate enable logic rwds_sample; assign cnt_target_value = cfg_edge_idx_i + 1; + assign cnt_at_target = (cnt_target_value == cnt_edge_q); always_comb begin : gen_edge_cnt - cnt_edge_d = cnt_edge_q +1; // count hyper_ck(_n) edges - - // reset counter when the transaction ends (CS goes high) - if(hyper_cs_ni) begin - cnt_edge_d = '0; - end else if(cnt_edge_q == '1) begin // stop counting to avoid overflow - cnt_edge_d = cnt_edge_q; + // only count at the start of a transfer + if(start_of_tf_q) begin + cnt_edge_d = cnt_edge_q +1; // count hyper_ck(_n) edges + end else begin + // reset counter for next start of transfer + cnt_edge_d = 1'b0; end end // sampling on the rising edge requires counting on falling edges to create @@ -75,8 +80,20 @@ module hyperbus_rwds_sampler import hyperbus_pkg::*; #() `FF(cnt_edge_q, cnt_edge_d, '0, cnt_clk); + // used to reset counter and ensure clock gate opens only once + // clocked with ungated clock to detect cs_n going high + always_comb begin : gen_start_of_transfer + start_of_tf_d = start_of_tf_q; + if(hyper_cs_ni) begin + start_of_tf_d = 1'b1; + end else if (cnt_at_target) begin + start_of_tf_d = 1'b0; + end + end + `FF(start_of_tf_q, start_of_tf_d, '0, tx_clk_90_i); + // TODO: Check proper sampling point in sim - assign enable_sampling = (cnt_edge_q == cnt_target_value); + assign enable_sampling = (cnt_at_target && start_of_tf_q); tc_clk_mux2 i_sampling_clk_mux ( .clk0_i ( hyper_ck_ni ), @@ -85,7 +102,7 @@ module hyperbus_rwds_sampler import hyperbus_pkg::*; #() .clk_o ( sampling_clk ) ); - // gate the sampling of rwds to the correct rising clock edge + // gate the sampling of rwds to the selected clock edge tc_clk_gating i_rwds_sample_rise_gate ( .clk_i ( sampling_clk ), .en_i ( enable_sampling ), @@ -93,7 +110,7 @@ module hyperbus_rwds_sampler import hyperbus_pkg::*; #() .clk_o ( sampling_clk_gated ) ); - // sample rwds exactly once at the correct edge + // sample rwds exactly once using gated clock `FF(rwds_sample, hyper_rwds_i, '0, sampling_clk_gated); // pass rwds to phy-clock domain diff --git a/src/hyperbus_trx.sv b/src/hyperbus_trx.sv index 0a15286..f586774 100644 --- a/src/hyperbus_trx.sv +++ b/src/hyperbus_trx.sv @@ -148,7 +148,8 @@ module hyperbus_trx #( .cfg_edge_idx_i, .cfg_edge_pol_i, .rwds_sample_o, - .hyper_cs_ni ( hyper_cs_no ), + .tx_clk_90_i ( tx_clk_90 ), + .hyper_cs_ni ( &hyper_cs_no ), .hyper_ck_i ( hyper_ck_o ), .hyper_ck_ni ( hyper_ck_no ), .hyper_rwds_i ( hyper_rwds_i ) From 1983836b2405aee2a6f0128ef1b400b765758cbe Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Tue, 28 Oct 2025 17:09:06 +0100 Subject: [PATCH 14/14] Sample RWDS relative to CS edge not clock start --- src/hyperbus_cfg_regs.sv | 4 +- src/hyperbus_phy.sv | 38 ++++++++---------- src/hyperbus_pkg.sv | 14 +++---- src/hyperbus_rwds_sampler.sv | 78 +++++++++++++++--------------------- src/hyperbus_trx.sv | 15 ++----- 5 files changed, 62 insertions(+), 87 deletions(-) diff --git a/src/hyperbus_cfg_regs.sv b/src/hyperbus_cfg_regs.sv index 680aa67..bafb747 100644 --- a/src/hyperbus_cfg_regs.sv +++ b/src/hyperbus_cfg_regs.sv @@ -59,7 +59,7 @@ module hyperbus_cfg_regs #( if (sel_reg_mapped) begin rfield = { crange_q, - reg_data_t'(cfg_q.rwds_sample_edge), + reg_data_t'(cfg_q.rwds_sample), reg_data_t'(cfg_q.csn_to_ck_cycles), reg_data_t'(cfg_q.t_csh_cycles), reg_data_t'(cfg_q.which_phy), @@ -102,7 +102,7 @@ module hyperbus_cfg_regs #( 'h9: cfg_d.which_phy = (NumPhys==1) ? 0 : ( (~wmask & cfg_q.which_phy ) | (wmask & reg_req_i.wdata) ); 'ha: cfg_d.t_csh_cycles = (~wmask & cfg_q.t_csh_cycles ) | (wmask & reg_req_i.wdata); 'hb: cfg_d.csn_to_ck_cycles = (~wmask & cfg_q.csn_to_ck_cycles ) | (wmask & reg_req_i.wdata); - 'hc: cfg_d.rwds_sample_edge = (~wmask & cfg_q.rwds_sample_edge ) | (wmask & reg_req_i.wdata); + 'hc: cfg_d.rwds_sample = (~wmask & cfg_q.rwds_sample ) | (wmask & reg_req_i.wdata); default: begin {sel_chip, chip_reg} = sel_reg - NumBaseRegs; crange_d[sel_chip][chip_reg] = (~wmask & crange_q[sel_chip][chip_reg]) | (wmask & reg_req_i.wdata); diff --git a/src/hyperbus_phy.sv b/src/hyperbus_phy.sv index 47e6420..43e4e57 100644 --- a/src/hyperbus_phy.sv +++ b/src/hyperbus_phy.sv @@ -98,7 +98,6 @@ module hyperbus_phy import hyperbus_pkg::*; #( logic trx_clk_ena; logic trx_cs_ena; logic trx_rwds_sample; - logic trx_rwds_sample_ena; logic [15:0] trx_tx_data; logic trx_tx_data_oe; logic [1:0] trx_tx_rwds; @@ -123,24 +122,23 @@ module hyperbus_phy import hyperbus_pkg::*; #( .clk_i_90, .rst_ni, .test_mode_i, - .cfg_edge_idx_i ( cfg_i.rwds_sample_edge.cylce_idx ), - .cfg_edge_pol_i ( cfg_i.rwds_sample_edge.polarity ), - .cs_i ( cs_q ), - .cs_ena_i ( trx_cs_ena ), - .rwds_sample_o ( trx_rwds_sample ), - .rwds_sample_ena_i ( trx_rwds_sample_ena ), - .tx_clk_delay_i ( cfg_i.t_tx_clk_delay ), - .tx_clk_ena_i ( trx_clk_ena ), - .tx_data_i ( trx_tx_data ), - .tx_data_oe_i ( trx_tx_data_oe ), - .tx_rwds_i ( trx_tx_rwds ), - .tx_rwds_oe_i ( trx_tx_rwds_oe ), - .rx_clk_delay_i ( cfg_i.t_rx_clk_delay ), - .rx_clk_set_i ( trx_rx_clk_set ), - .rx_clk_reset_i ( trx_rx_clk_reset ), - .rx_data_o ( trx_rx_data ), - .rx_valid_o ( trx_rx_valid ), - .rx_ready_i ( trx_rx_ready ), + .cfg_edge_idx_i ( cfg_i.rwds_sample.cylce_idx ), + .cfg_edge_pol_i ( cfg_i.rwds_sample.polarity ), + .cs_i ( cs_q ), + .cs_ena_i ( trx_cs_ena ), + .rwds_sample_o ( trx_rwds_sample ), + .tx_clk_delay_i ( cfg_i.t_tx_clk_delay ), + .tx_clk_ena_i ( trx_clk_ena ), + .tx_data_i ( trx_tx_data ), + .tx_data_oe_i ( trx_tx_data_oe ), + .tx_rwds_i ( trx_tx_rwds ), + .tx_rwds_oe_i ( trx_tx_rwds_oe ), + .rx_clk_delay_i ( cfg_i.t_rx_clk_delay ), + .rx_clk_set_i ( trx_rx_clk_set ), + .rx_clk_reset_i ( trx_rx_clk_reset ), + .rx_data_o ( trx_rx_data ), + .rx_valid_o ( trx_rx_valid ), + .rx_ready_i ( trx_rx_ready ), .hyper_cs_no, .hyper_ck_o, .hyper_ck_no, @@ -245,7 +243,6 @@ module hyperbus_phy import hyperbus_pkg::*; #( trx_cs_ena = 1'b1; trx_clk_ena = 1'b0; trx_rx_clk_set = 1'b0; - trx_rwds_sample_ena = 1'b0; // Default next state state_d = state_q; timer_d = timer_q - 1; @@ -301,7 +298,6 @@ module hyperbus_phy import hyperbus_pkg::*; #( // Dataflow handled outside FSM trx_clk_ena = 1'b1; trx_tx_data_oe = 1'b1; - trx_rwds_sample_ena = ~ctl_write_zero_lat; if (ctl_timer_zero) begin if (ctl_write_zero_lat) begin timer_d = cfg_i.t_burst_max; diff --git a/src/hyperbus_pkg.sv b/src/hyperbus_pkg.sv index cf9126b..e1f90e6 100644 --- a/src/hyperbus_pkg.sv +++ b/src/hyperbus_pkg.sv @@ -10,8 +10,8 @@ package hyperbus_pkg; typedef struct packed { - logic [1:0] cylce_idx; - logic polarity; + logic [3:0] cylce_idx; // number of cycles passed when sampling should occur + logic polarity; // 1: rising, 0: falling } hyper_cfg_rwds_t; // configuration type @@ -28,7 +28,7 @@ package hyperbus_pkg; logic which_phy; logic [3:0] t_csh_cycles; // add an configurable Tcsh for high freq operation(200MHz Hyperram) logic [3:0] csn_to_ck_cycles; // delay hyper_ck after CS is asserted (more time for t_DSV) - hyper_cfg_rwds_t rwds_sample_edge; + hyper_cfg_rwds_t rwds_sample; } hyper_cfg_t; typedef struct packed { @@ -85,10 +85,10 @@ package hyperbus_pkg; phys_in_use: NumPhys-1, which_phy: NumPhys-1, t_csh_cycles: 'h1, - csn_to_ck_cycles: 'h2, - rwds_sample_edge: hyper_cfg_rwds_t'{ // third rising edge, see hyperbus_rwds_sampler - cylce_idx: 'h1, - polarity: 'b1 } + csn_to_ck_cycles: 'h0, // additional cycles from CS_N going low to start of hyper_ck + rwds_sample: hyper_cfg_rwds_t'{ // hyper_ck edge for RWDS sampling relative to CS_N going low + cylce_idx: 'h2, // cycle number after CS_N going low (first falling and rising edge is idx=0) + polarity: 'b1 } // 0: falling, 1:rising -> first edge after CS_N is a falling edge }; return cfg; diff --git a/src/hyperbus_rwds_sampler.sv b/src/hyperbus_rwds_sampler.sv index 01dac40..3c6927b 100644 --- a/src/hyperbus_rwds_sampler.sv +++ b/src/hyperbus_rwds_sampler.sv @@ -31,76 +31,62 @@ module hyperbus_rwds_sampler import hyperbus_pkg::*; #() input logic rst_ni, input logic test_mode_i, - input logic [1:0] cfg_edge_idx_i, // #edge where rwds is sampled + input logic [3:0] cfg_edge_idx_i, // #edge where rwds is sampled input logic cfg_edge_pol_i, // 1: rising, 0: falling // sampled value going to PHY-FSM output logic rwds_sample_o, - // ungated hyperbus clock - input logic tx_clk_90_i, - // physical HyperBus signals input logic hyper_cs_ni, - input logic hyper_ck_i, - input logic hyper_ck_ni, input logic hyper_rwds_i ); // used to time the sampling of RWDS to determine additional latency - logic [2:0] cnt_edge_d, cnt_edge_q; // one bit larger than config - logic start_of_tf_d, start_of_tf_q; // start of transfer indicator - logic [2:0] cnt_target_value; - logic cnt_at_target; + logic tx_clk_180; // inverted clock + logic [4:0] cnt_edge_d, cnt_edge_q; // one bit larger than config logic cnt_clk; // clock used for edge counting logic sampling_clk, sampling_clk_gated; // clock used for sampling logic enable_sampling; // sampling clock gate enable logic rwds_sample; - assign cnt_target_value = cfg_edge_idx_i + 1; - assign cnt_at_target = (cnt_target_value == cnt_edge_q); - + // generate and select clocks + // Sampling is either clocked by un-inverted or inverted 90deg hyperbus clock + // Counter is clocked by the inverse as it controls the clock gate + // which should be on for one cycle with sampling edge in the middle + tc_clk_inverter i_tx_clk_inv ( + .clk_i ( clk_i ), + .clk_o ( tx_clk_180 ) + ); + + tc_clk_mux2 i_sampling_clk_mux ( + .clk0_i ( tx_clk_180 ), + .clk1_i ( clk_i ), + .clk_sel_i ( cfg_edge_pol_i ), + .clk_o ( sampling_clk ) + ); + + tc_clk_inverter i_edge_cnt_clk_inv ( + .clk_i ( sampling_clk ), + .clk_o ( cnt_clk ) + ); + always_comb begin : gen_edge_cnt - // only count at the start of a transfer - if(start_of_tf_q) begin - cnt_edge_d = cnt_edge_q +1; // count hyper_ck(_n) edges + // only count during transfers + if(~hyper_cs_ni) begin + cnt_edge_d = cnt_edge_q +1; + if(cnt_edge_q == '1) begin + cnt_edge_d = cnt_edge_q; // saturating counter + end end else begin - // reset counter for next start of transfer + // reset counter for next transfer cnt_edge_d = 1'b0; end end - // sampling on the rising edge requires counting on falling edges to create - // a window where the clk-gate is transparent around rising edge and vice versa - tc_clk_mux2 i_cnt_clk_mux ( - .clk0_i ( hyper_ck_ni ), - .clk1_i ( hyper_ck_i ), - .clk_sel_i ( ~cfg_edge_pol_i ), - .clk_o ( cnt_clk ) - ); `FF(cnt_edge_q, cnt_edge_d, '0, cnt_clk); - // used to reset counter and ensure clock gate opens only once - // clocked with ungated clock to detect cs_n going high - always_comb begin : gen_start_of_transfer - start_of_tf_d = start_of_tf_q; - if(hyper_cs_ni) begin - start_of_tf_d = 1'b1; - end else if (cnt_at_target) begin - start_of_tf_d = 1'b0; - end - end - `FF(start_of_tf_q, start_of_tf_d, '0, tx_clk_90_i); - - // TODO: Check proper sampling point in sim - assign enable_sampling = (cnt_at_target && start_of_tf_q); - - tc_clk_mux2 i_sampling_clk_mux ( - .clk0_i ( hyper_ck_ni ), - .clk1_i ( hyper_ck_i ), - .clk_sel_i ( cfg_edge_pol_i ), - .clk_o ( sampling_clk ) - ); + assign enable_sampling = (cnt_edge_q == cfg_edge_idx_i) & ~hyper_cs_ni; // gate the sampling of rwds to the selected clock edge tc_clk_gating i_rwds_sample_rise_gate ( diff --git a/src/hyperbus_trx.sv b/src/hyperbus_trx.sv index f586774..098935b 100644 --- a/src/hyperbus_trx.sv +++ b/src/hyperbus_trx.sv @@ -18,14 +18,13 @@ module hyperbus_trx #( input logic rst_ni, input logic test_mode_i, - input logic [1:0] cfg_edge_idx_i, + input logic [3:0] cfg_edge_idx_i, input logic cfg_edge_pol_i, // Transceiver control: facing controller input logic [NumChips-1:0] cs_i, input logic cs_ena_i, output logic rwds_sample_o, - input logic rwds_sample_ena_i, input logic [3:0] tx_clk_delay_i, input logic tx_clk_ena_i, @@ -69,11 +68,6 @@ module hyperbus_trx #( logic rx_rwds_fifo_valid; logic rx_rwds_fifo_ready; - // used to time the sampling of RWDS to determine additional latency - logic [2:0] ck_cnt_d, ck_cnt_q; // TODO: check in sim if this can be one less - logic rwds_sample_ena; - logic rwds_sample_clk; - // Feed through async reset assign hyper_reset_no = rst_ni; @@ -85,6 +79,8 @@ module hyperbus_trx #( assign tx_clk_90 = clk_i_90; // 90deg-shifted differential output clock, sampling output bytes centrally + // TODO: tx_clk_ena_q to tx_clk_90 may need a constraint at the pins of this module + // specifically tx_clk_ena_q must arrive BEFORE tx_clk_90 otherwise the gating may fail hyperbus_clock_diff_out i_clock_diff_out ( .in_i ( tx_clk_90 ), .en_i ( tx_clk_ena_q ), @@ -93,7 +89,7 @@ module hyperbus_trx #( ); // Synchronize output chip select to shifted differential output clock - always_ff @(posedge tx_clk_90 or negedge rst_ni) begin : proc_ff_tx_shift90 + always_ff @(negedge clk_i or negedge rst_ni) begin : proc_ff_tx_shift90 if (~rst_ni) hyper_cs_no <= '1; else hyper_cs_no <= cs_ena_i ? ~cs_i : '1; end @@ -148,10 +144,7 @@ module hyperbus_trx #( .cfg_edge_idx_i, .cfg_edge_pol_i, .rwds_sample_o, - .tx_clk_90_i ( tx_clk_90 ), .hyper_cs_ni ( &hyper_cs_no ), - .hyper_ck_i ( hyper_ck_o ), - .hyper_ck_ni ( hyper_ck_no ), .hyper_rwds_i ( hyper_rwds_i ) );