Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

TMU prefetch: cache data memory. Prefetch complete but untested.

  • Loading branch information...
commit 70d9ba910c0637466a77050c4f90da77222ecb95 1 parent 926a3bd
@sbourdeauducq sbourdeauducq authored
View
176 cores/tmu2/rtl/tmu2_datamem.v
@@ -15,18 +15,18 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-module tmu2_datamem#(
+module tmu2_datamem #(
parameter cache_depth = 13,
parameter fml_depth = 26
) (
input sys_clk,
input sys_rst,
- output busy,
+ output reg busy,
/* from fragment FIFO */
input frag_pipe_stb_i,
- output frag_pipe_ack_o,
+ output reg frag_pipe_ack_o,
input [fml_depth-1-1:0] frag_dadr,
input [cache_depth-1:0] frag_tadra, /* < texel cache addresses (in bytes) */
input [cache_depth-1:0] frag_tadrb,
@@ -41,19 +41,181 @@ module tmu2_datamem#(
/* from fetch unit */
input fetch_pipe_stb_i,
- output fetch_pipe_ack_o,
+ output reg fetch_pipe_ack_o,
input [255:0] fetch_dat,
/* to downstream pipeline */
output reg pipe_stb_o,
input pipe_ack_i,
- output [fml_depth-1-1:0] dadr_f, /* in 16-bit words */
+ output reg [fml_depth-1-1:0] dadr_f, /* in 16-bit words */
output [15:0] tcolora,
output [15:0] tcolorb,
output [15:0] tcolorc,
output [15:0] tcolord,
- output [5:0] x_frac_f,
- output [5:0] y_frac_f
+ output reg [5:0] x_frac_f,
+ output reg [5:0] y_frac_f
);
+reg req_ce;
+reg req_valid;
+reg [cache_depth-1:0] frag_tadra_r;
+reg [cache_depth-1:0] frag_tadrb_r;
+reg [cache_depth-1:0] frag_tadrc_r;
+reg [cache_depth-1:0] frag_tadrd_r;
+reg frag_miss_a_r;
+reg frag_miss_b_r;
+reg frag_miss_c_r;
+reg frag_miss_d_r;
+always @(posedge sys_clk) begin
+ if(req_ce) begin
+ req_valid <= pipe_stb_i;
+
+ frag_tadra_r <= frag_tadra;
+ frag_tadrb_r <= frag_tadrb;
+ frag_tadrc_r <= frag_tadrc;
+ frag_tadrd_r <= frag_tadrd;
+ frag_miss_a_r <= frag_miss_a;
+ frag_miss_b_r <= frag_miss_b;
+ frag_miss_c_r <= frag_miss_c;
+ frag_miss_d_r <= frag_miss_d;
+
+ dadr_f <= frag_dadr;
+ x_frac_f <= frag_x_frac;
+ y_frac_f <= frag_y_frac;
+ end
+end
+
+reg retry;
+wire adra = retry ? frag_tadra_r : frag_tadra;
+wire adrb = retry ? frag_tadrb_r : frag_tadrb;
+wire adrc = retry ? frag_tadrc_r : frag_tadrc;
+wire adrd = retry ? frag_tadrd_r : frag_tadrd;
+
+reg [1:0] wa_sel;
+reg [cache_depth-1:0] wa;
+always @(*) begin
+ case(wa_sel)
+ 2'd0: wa = frag_tadra_r;
+ 2'd1: wa = frag_tadrb_r;
+ 2'd2: wa = frag_tadrc_r;
+ default: wa = frag_tadrd_r;
+ endcase
+end
+
+reg we;
+tmu2_qpram #(
+ .depth(cache_depth)
+) qpram (
+ .sys_clk(sys_clk),
+
+ .raa(adra),
+ .rda(tcolora),
+ .rab(adrb),
+ .rdb(tcolorb),
+ .rac(adrc),
+ .rdc(tcolorc),
+ .rad(adrd),
+ .rdd(tcolord),
+
+ .we(we),
+ .wa(wa),
+ .wd(fetch_dat)
+);
+
+reg [3:0] missmask;
+reg missmask_init;
+reg missmask_we;
+always @(posedge sys_clk) begin
+ if(missmask_init)
+ missmask <= 4'b1111;
+ if(missmask_we) begin
+ case(tag_sel)
+ 2'd0: missmask <= missmask & 4'b1110;
+ 2'd1: missmask <= missmask & 4'b1101;
+ 2'd2: missmask <= missmask & 4'b1011;
+ default: missmask <= missmask & 4'b0111;
+ endcase
+ end
+end
+
+reg [1:0] state;
+reg [1:0] next_state;
+
+parameter RUNNING = 2'd0;
+parameter COMMIT = 2'd1;
+parameter STROBE = 2'd2;
+
+always @(posedge sys_clk) begin
+ if(sys_rst)
+ state <= RUNNING;
+ else
+ state <= next_state;
+end
+
+always @(*) begin
+ next_state = state;
+
+ busy = 1'b0;
+ frag_pipe_ack_o = 1'b0;
+ fetch_pipe_ack_o = 1'b0;
+ pipe_stb_o = 1'b0;
+
+ req_ce = 1'b0;
+ retry = 1'b0;
+ wa_sel = 2'd0;
+ we = 1'b0;
+ missmask_init = 1'b0;
+ missmask_we = 1'b0;
+
+ case(state)
+ RUNNING: begin
+ frag_pipe_ack_o = 1'b1;
+ req_ce = 1'b1;
+ missmask_init = 1'b1;
+ if(req_valid) begin
+ pipe_stb_o = 1'b1;
+ if(frag_miss_a_r | frag_miss_b_r | frag_miss_c_r | frag_miss_d_r) begin
+ frag_pipe_ack_o = 1'b0;
+ req_ce = 1'b0;
+ pipe_stb_o = 1'b0;
+ next_state = COMMIT;
+ end else if(~pipe_ack_i) begin
+ frag_pipe_ack_o = 1'b0;
+ req_ce = 1'b0;
+ retry = 1'b1;
+ end
+ end
+ end
+ COMMIT: begin
+ retry = 1'b1;
+ if((~frag_miss_a_r | ~missmask[0]) & (~frag_miss_b_r | ~missmask[1]) & (~frag_miss_c_r | ~missmask[2]) & (~frag_miss_d_r | ~missmask[3]))
+ next_state = STROBE;
+ else begin
+ fetch_pipe_ack_o = 1'b1;
+ if(fetch_pipe_stb_i) begin
+ if(frag_miss_a_r & missmask[0])
+ wa_sel = 2'd0;
+ else if(frag_miss_b_r & missmask[1])
+ wa_sel = 2'd0;
+ else if(frag_miss_c_r & missmask[2])
+ wa_sel = 2'd2;
+ else
+ wa_sel = 2'd3;
+ missmask_we = 1'b1;
+ we = 1'b1;
+ end
+ end
+ end
+ STROBE: begin
+ retry = 1'b1;
+ pipe_stb_o = 1'b1;
+ if(pipe_ack_i) begin
+ retry = 1'b0;
+ req_ce = 1'b1;
+ next_state = RUNNING;
+ end
+ end
+ endcase
+end
+
endmodule
View
5 cores/tmu2/rtl/tmu2_qpram.v
@@ -20,7 +20,6 @@ module tmu2_qpram #(
) (
input sys_clk,
- input re,
input [depth-1:0] raa, /* < in bytes, 16-bit aligned */
output reg [15:0] rda,
input [depth-1:0] rab,
@@ -89,13 +88,11 @@ tmu2_tdpram #(
.a(we ? {wa[depth-1:5], 2'b00} : raa[depth-1:3]),
.we(we),
- .re(re),
.di(wd[255:192]),
.do(rd64a),
.a2(we ? {wa[depth-1:5], 2'b01} : rab[depth-1:3]),
.we2(we),
- .re2(re),
.di2(wd[191:128]),
.do2(rd64b)
);
@@ -108,13 +105,11 @@ tmu2_tdpram #(
.a(we ? {wa[depth-1:5], 2'b10} : rac[depth-1:3]),
.we(we),
- .re(re),
.di(wd[127:64]),
.do(rd64c),
.a2(we ? {wa[depth-1:5], 2'b11} : rad[depth-1:3]),
.we2(we),
- .re2(re),
.di2(wd[63:0]),
.do2(rd64d)
);
View
8 cores/tmu2/rtl/tmu2_tdpram.v
@@ -25,13 +25,11 @@ module tmu2_tdpram #(
input [depth-1:0] a,
input we,
- input re,
input [width-1:0] di,
output reg [width-1:0] do,
input [depth-1:0] a2,
input we2,
- input re2,
input [width-1:0] di2,
output reg [width-1:0] do2
);
@@ -42,15 +40,13 @@ reg [width-1:0] ram[0:(1 << depth)-1];
always @(posedge sys_clk) begin
if(we)
ram[a] <= di;
- if(re)
- do <= ram[a];
+ do <= ram[a];
end
always @(posedge sys_clk) begin
if(we2)
ram[a2] <= di2;
- if(re2)
- do2 <= ram[a2];
+ do2 <= ram[a2];
end
// synthesis translate_off
Please sign in to comment.
Something went wrong with that request. Please try again.