Skip to content

Commit

Permalink
Memory cache removed, SRAM module reimplemented
Browse files Browse the repository at this point in the history
I've removed the cache, which required reimplementing the 8-bit SRAM controller built into the ZXUno.

The core is not working now, something is not working and I still don't know what it is. I have carried out the simulation of the new SRAM module, both for writing and reading, I attach PDFs... the simulation results do not show errors in the timing of the signals and the results of din/dout, sram_a and sram_d .

@gyurco, do you have any ideas?
  • Loading branch information
spark2k06 committed Jan 23, 2022
1 parent 4f495fb commit a7f1293
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 175 deletions.
179 changes: 83 additions & 96 deletions HW/common/sram.v
@@ -1,103 +1,90 @@
//////////////////////////////////////////////////////////////////////////////////
//
// Filename: sram.v
// Description: SRAM 8-bit controller for the Next186 SoC PC project,
// Version 1.0
// Creation date: Dec2016
//
// Author: DistWave
//
// Based on SDRAM 16-bit controller from the Next186 SoC PC project by Nicolae Dumitrache
/////////////////////////////////////////////////////////////////////////////////
`timescale 1ns / 1ps

module SRAM_8bit(
input sys_CLK, // clock
input [1:0]sys_CMD, // 00=nop, 01=write 256 bytes, 11=read 256 bytes
input [18:0]sys_ADDR, // word address, multiple of 2 words (4 bytes)
input [15:0]sys_DIN, // data input
output reg [15:0]sys_DOUT,
output reg sys_rd_data_valid = 0, // data valid out
output reg sys_wr_data_valid = 0, // data valid in

input sram_clk,
output sram_n_WE, // SRAM #WE
output reg [20:0]sram_ADDR, // SRAM address
inout [7:0]sram_DATA // SRAM data
);
module SRAM_8bit (
input wire [20:0] addr, // IN [20:0] -> SRAM address [20:0] (up to 2MByte)
output reg [31:0] dout, // OUT [31:0] -> DWORD from SRAM
input wire [31:0] din, // IN [31:0] -> DWORD to SRAM
input wire clk, // IN -> CPU Clock
input wire mreq, // IN -> Read/Write request
input wire [3:0] wmask, // IN [3:0] -> Byte to write
output wire ce, // OUT -> To tell the Next186 CPU module to wait for read and/or write operations to complete

reg [2:0]STATE = 0;
reg [2:0]RET; // return state
reg [6:0]DLY; // delay
reg [1:0]sys_cmd_ack = 0; // command acknowledged
reg [15:0]reg_din;
reg [5:0]out_data_valid = 0;

assign sram_DATA = out_data_valid[2] ? sram_ADDR[0] ? reg_din[15:8] : reg_din[7:0] : 8'hzz;
assign sram_n_WE = out_data_valid[2] ? 0 : 1;

reg [7:0]sram_data2;
output reg [20:0] sram_a, // OUT -> SRAM address [20:0]
inout wire [7:0] sram_d, // IN/OUT -> SRAM data [7:0]
output reg sram_we_n = 1 // OUT -> SRAM #WE (1 bit)
);


reg [2:0] s_byte = 0;
reg [3:0] status = 0;
reg [7:0] s_din;
reg s_mreq = 0;
wire wr = |wmask;
assign ce = (status == 0) && ~s_mreq;
assign sram_d = sram_we_n ? 8'hZZ : s_din;

always @(posedge sram_clk) begin
sram_data2 <= sram_DATA;
case(STATE)
0: begin
if(|sys_CMD) begin
sram_ADDR <= {sys_ADDR[18:0], 2'b00};
end
end
1: begin
if ((sys_rd_data_valid == 1'b1) || (out_data_valid[2] == 1'b1)) begin
sram_ADDR <= sram_ADDR + 1'b1;
end
end
7: begin
if(sys_cmd_ack[1]) begin
sram_ADDR <= sram_ADDR + 1;
end
end
endcase
end


always @(posedge sys_CLK) begin
STATE <= 1;
reg_din <= sys_DIN;
out_data_valid <= {out_data_valid[1:0], sys_wr_data_valid};
DLY <= DLY - 1;
sys_DOUT <= {sram_DATA, sram_data2};

case(STATE)
0: begin
sys_rd_data_valid <= 1'b0;
if(|sys_CMD) begin
sys_cmd_ack <= sys_CMD;
STATE <= 5;
end
else begin
sys_cmd_ack <= 2'b00;
STATE <= 0;
always @(posedge clk) begin
case(status)
0: begin
if (mreq) begin
s_mreq = 1;
sram_a <= {addr[20:2], 2'b00};
s_byte <= 0;
if (wr)
status <= 2;
else status <= 1;
end

end
1: begin
sram_a <= sram_a + 1;
s_byte <= s_byte + 1;
case(s_byte)
0:
dout[7:0] <= sram_d;
1:
dout[15:8] <= sram_d;
2:
dout[23:16] <= sram_d;
3: begin
dout[31:24] <= sram_d;
s_mreq = 0;
status <= 0;
end
endcase
end
2: begin
status <= 3;
case(s_byte)
0: begin
s_din = din[7:0];
sram_we_n = ~wmask[0];
end
1: begin
s_din = din[15:8];
sram_we_n = ~wmask[1];
end
2: begin
s_din = din[23:16];
sram_we_n = ~wmask[2];
end
3: begin
s_din = din[31:24];
sram_we_n = ~wmask[3];
end
endcase
end
1: begin
if(DLY == 3) sys_wr_data_valid <= 1'b0;
if(DLY == 0) STATE <= RET; // NOP for DLY clocks, return to RET state
end
5: begin // read/write
RET <= 7;
if(sys_cmd_ack[1]) begin // read
STATE <= 7;
end else begin // write
DLY <= 1;
sys_wr_data_valid <= 1'b1;
end
end
7: begin // init read/write phase
if(sys_cmd_ack[1]) begin
sys_rd_data_valid <= 1'b1;
end
RET <= 0;
DLY <= sys_cmd_ack[1] ? 128 - 2 : 128 - 1;
end
3: begin
sram_we_n = 1;
sram_a <= sram_a + 1;
s_byte <= s_byte + 1;
if (s_byte == 4) begin
s_mreq = 0;
status <= 0;
end
else status <= 2;
end
endcase
end
endmodule

endmodule
119 changes: 40 additions & 79 deletions HW/system_2MB.v
Expand Up @@ -155,17 +155,14 @@ module system_2MB
inout PS2_DATA2,
output wire [1:0] monochrome_switcher

);

wire [15:0]sys_DIN;
wire [15:0]sys_DOUT;
wire sys_rd_data_valid;
wire sys_wr_data_valid;
);

wire [12:0]waddr;
wire [31:0] DOUT;
wire [15:0]CPU_DOUT;
wire [15:0]PORT_ADDR;
wire [31:0] DRAM_dout;
wire [15:0]PORT_ADDR;
wire [31:0] SRAM_dout;

wire [20:0] ADDR;
wire IORQ;
wire WR;
Expand Down Expand Up @@ -212,9 +209,7 @@ module system_2MB
wire [1:0] cpu_speed_switcher;
reg cpu_speed_max = 0;

reg [1:0]command = 0;
reg [1:0]s_ddr_rd = 0;
reg [1:0]s_ddr_wr = 0;
reg [1:0]command = 0;

reg s_RS232_DCE_RXD;
reg s_RS232_HOST_RXD;
Expand All @@ -224,8 +219,7 @@ module system_2MB
reg [15:0]RTC = 0;
reg [15:0]RTCSET = 0;
wire RTCEND = RTC == RTCSET;
wire RTCDIVEND = RTCDIV25 == 24;
reg [12:0]cache_hi_addr;
wire RTCDIVEND = RTCDIV25 == 24;
wire [7:0]memmap;
wire [6:0]memmap_mux;
wire f_map_to_f;
Expand All @@ -236,9 +230,7 @@ module system_2MB
reg [1:0] speaker_on = 0;
reg [9:0]rNMI = 0;

reg [2:0] div_clk_cpu = 3'd0;
reg [18:0]sysaddr;
reg [2:0]auto_flush = 3'b000;
reg [2:0] div_clk_cpu = 3'd0;
wire clk_cpu;


Expand Down Expand Up @@ -341,39 +333,39 @@ module system_2MB
);

defparam cga1.BLINK_MAX = 24'd4772727;

SRAM_8bit SRAM
(
.sys_CLK(clk_sdr), // clock
.sys_CMD(command), // 00=nop, 01 = write 256 bytes, 11=read 256 bytes
.sys_ADDR(sysaddr), // byte address
.sys_DIN(sys_DIN), // data input
.sys_DOUT(sys_DOUT), // data output
.sys_rd_data_valid(sys_rd_data_valid), // data valid read
.sys_wr_data_valid(sys_wr_data_valid), // data valid write

.sram_clk(clk_sram),
.sram_n_WE(SRAM_WE_n), // SRAM #WE
.sram_ADDR(SRAM_ADDR), // SRAM address
.sram_DATA(SRAM_DATA) // SRAM data
);

parameter crtc_addr = 6'b010111; // B8000 (32 KB)
parameter bios_addr_fe000 = 8'b01111111; // FE000 (2 KB)

wire MREQ;
wire CRTCVRAM = (ADDR[20:15] == crtc_addr);
wire BIOSROM = ((ADDR[20:13] == bios_addr_fe000) & f_map_to_f);

wire CACHE_EN = ~(CRTCVRAM | BIOSROM);
wire CACHE_MREQ = MREQ & CACHE_EN;
wire CRTCVRAM = MREQ & (ADDR[20:15] == crtc_addr);
wire BIOSROM = MREQ & ((ADDR[20:13] == bios_addr_fe000) & f_map_to_f);
wire SRAM_EN = ~(CRTCVRAM | BIOSROM);
wire SRAM_MREQ = MREQ & SRAM_EN;

wire [31:0] vram_dout;
wire [31:0] bios_dout;
wire [31:0] CPU_DIN;
reg s_cache_mreq;

assign CPU_DIN = s_cache_mreq ? DRAM_dout : CRTCVRAM ? vram_dout : bios_dout;
wire [31:0] CPU_DIN;
reg s_sram_mreq;
reg s_mreq;

assign CPU_DIN = s_sram_mreq ? SRAM_dout : s_mreq ? CRTCVRAM ? vram_dout : bios_dout : 32'hFFFFFFFF;

SRAM_8bit SRAM
(
.addr({memmap_mux, ADDR[13:0]}), // [20:0] -> Direccionamiento a nivel de BYTE en los 2Mbytes de la SRAM
.dout(SRAM_dout), // OUT [31:0] -> DWORD leido de la SRAM
.din(DOUT), // IN [31:0] -> DWORD para escribir en la SRAM
.clk(clk_cpu), // IN -> Reloj de la CPU
.mreq(SRAM_MREQ), // IN -> Es la petici�n de Lectura/Escritura
.wmask(RAM_WMASK), // IN [3:0] -> Indica el byte que se debe escribir
.ce(CE), // OUT -> indicar al modulo de CPU del Next186 que espere a que termine las operaciones de lectura y/o escritura
// Acceso f�sico a la SRAM
.sram_we_n(SRAM_WE_n), // OUT -> SRAM #WE (1 bit)
.sram_a(SRAM_ADDR), // OUT -> SRAM address [20:0]
.sram_d(SRAM_DATA) // IN/OUT -> SRAM data [7:0]
);

BRAM_8KB_BIOS BIOS
(
Expand Down Expand Up @@ -421,27 +413,7 @@ module system_2MB
else if(!rNMI[9] && RTCDIVEND) rNMI <= rNMI + 1'b1; // 1Mhz increment

end


cache_controller cache_ctl
(
.addr(ADDR),
.dout(DRAM_dout),
.din(DOUT),
.clk(clk_cpu),
.mreq(CACHE_MREQ),
.wmask(RAM_WMASK),
.ce(CE),
.ddr_din(sys_DOUT),
.ddr_dout(sys_DIN),
.ddr_clk(clk_sdr),
.ddr_rd(ddr_rd),
.ddr_wr(ddr_wr),
.waddr(waddr),
.cache_write_data(sys_rd_data_valid), // read SRAM, write to cache
.cache_read_data(sys_wr_data_valid)
//.flush(auto_flush == 3'b101)
);
end

wire I_KB;
wire I_MOUSE;
Expand Down Expand Up @@ -470,6 +442,7 @@ module system_2MB
wire INT;
wire timer_int;
wire I_COM1;

PIC_8259 PIC
(
.RST(!rstcount[4]),
Expand Down Expand Up @@ -519,7 +492,7 @@ module system_2MB
.cpuaddr(PORT_ADDR[3:0]),
.cpurdata(memmap),
.cpuwdata(CPU_DOUT[7:0]),
.memaddr(cache_hi_addr[11:6]), // A20 disabled (PCXT)
.memaddr(ADDR[19:14]), // A20 disabled (PCXT)
.memdata(memmap_mux),
.WE(MEMORY_MAP & WR & WORD & IORQ & CPU_CE),
.WE_EMS(EMS_OE & WR & IORQ & CPU_CE),
Expand Down Expand Up @@ -554,20 +527,10 @@ module system_2MB
.snd(opl2snd)
//.irq_n()
);

always @ (posedge clk_sdr) begin
s_ddr_rd <= {s_ddr_rd[0], ddr_rd};
s_ddr_wr <= {s_ddr_wr[0], ddr_wr};
cache_hi_addr <= s_ddr_wr[0] ? waddr : ADDR[20:8];
sysaddr <= {memmap_mux, cache_hi_addr[5:0], 6'b000000};

if(s_ddr_wr[1]) command <= 2'b01; // write 256 bytes cache
else if(s_ddr_rd[1]) command <= 2'b11; // read 256 bytes cache
else command <= 2'b00;
end

always @ (posedge clk_cpu) begin
s_cache_mreq <= CACHE_MREQ;
always @ (posedge clk_cpu) begin
s_sram_mreq <= SRAM_MREQ;
s_mreq <= MREQ;
if(IORQ & CPU_CE) begin
if(WR & SPEAKER_PORT) speaker_on <= CPU_DOUT[1:0];
end
Expand Down Expand Up @@ -603,9 +566,7 @@ module system_2MB
end else if(RTCSYNC == 2'b01) begin
if(RTCEND) RTC <= 0;
else RTC <= RTC + 1;
end

//auto_flush[1:0] <= {auto_flush[0], vblnk};
end
end

assign AUD_L = sndsign;
Expand Down
Binary file added sram_reading.pdf
Binary file not shown.
Binary file added sram_writing.pdf
Binary file not shown.

8 comments on commit a7f1293

@gyurco
Copy link

@gyurco gyurco commented on a7f1293 Jan 23, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, but the cache/memory architecture of this core is mostly a black box for me. And it doesn't help that the CPU's bus is far from the original, tightly coupled with the cache, and even with the display RAM access.

@spark2k06
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, but the cache/memory architecture of this core is mostly a black box for me. And it doesn't help that the CPU's bus is far from the original, tightly coupled with the cache, and even with the display RAM access.

Don't worry, it had to be tried ;-), I'll keep thinking how to solve it. Thanks!

@spark2k06
Copy link
Owner Author

@spark2k06 spark2k06 commented on a7f1293 Jan 26, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is true that the BIU module is very difficult to digest. Even something that could be simpler, such as removing or simply reducing the 16-byte instruction cache/queue, something I'm also interested in... I can't get it.

Can you think of any way to do the latter that I propose? At least reduce the instruction queue? For me it would be a step forward.

@gyurco
Copy link

@gyurco gyurco commented on a7f1293 Jan 26, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you want to remove the cache at all? The CPU won't be cycle exact anyway. If you want that, maybe you should use a different CPU, like https://github.com/MicroCoreLabs/Projects/tree/master/MCL86 (used in the Q*bert core already, so it works).
With the cache, you can throttle the CPU in case of cache miss with the CE signal, as I did:
https://github.com/gyurco/Next186/blob/master/rtl/cache_controller.sv#L190

@spark2k06
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't rule out the use of MCL86, in fact I already thought about it because the truth is that for my purpose, Next186 is turning out to be very unstable.

It is not an easy task, since I would like to take advantage of the rest of the Next186 modules, such as timer, etc... but in the end, I will have no other option.

Another project that I had in mind that could perhaps be used is the Zet processor:

https://github.com/marmolejo/zet

@gyurco
Copy link

@gyurco gyurco commented on a7f1293 Jan 26, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the existing PIT/PIC/etc. modules can be used with another CPU, too, since their bus interface are ordinary and universal.

@gyurco
Copy link

@gyurco gyurco commented on a7f1293 Mar 14, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Meanwhile I've added the basic CGA modes to the original fork. Now BIOS text and pixel draw functions must be implemented, as it's used by some games (BIOS text drawing even used in VGA games).

@spark2k06
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I have seen it. That's great!

Please sign in to comment.