/
neorv32_cpu_control.vhd
2664 lines (2365 loc) · 167 KB
/
neorv32_cpu_control.vhd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
-- #################################################################################################
-- # << NEORV32 - CPU Operations Control Unit >> #
-- # ********************************************************************************************* #
-- # CPU operations are controlled by several "engines" (modules). These engines operate in #
-- # parallel to implement a simple pipeline: #
-- # + Fetch engine: Fetches 32-bit chunks of instruction words #
-- # + Issue engine: Decodes compressed instructions, aligns and queues instruction words #
-- # + Execute engine: Multi-cycle execution of instructions (generate control signals) #
-- # + Trap controller: Handles interrupts and exceptions #
-- # + CSR module: Read/write access to control and status registers #
-- # + Debug module: CPU debug mode handling (on-chip debugger) #
-- # + Trigger module: Hardware-assisted breakpoints (on-chip debugger) #
-- # ********************************************************************************************* #
-- # BSD 3-Clause License #
-- # #
-- # Copyright (c) 2022, Stephan Nolting. All rights reserved. #
-- # #
-- # Redistribution and use in source and binary forms, with or without modification, are #
-- # permitted provided that the following conditions are met: #
-- # #
-- # 1. Redistributions of source code must retain the above copyright notice, this list of #
-- # conditions and the following disclaimer. #
-- # #
-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of #
-- # conditions and the following disclaimer in the documentation and/or other materials #
-- # provided with the distribution. #
-- # #
-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to #
-- # endorse or promote products derived from this software without specific prior written #
-- # permission. #
-- # #
-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS #
-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF #
-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE #
-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, #
-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE #
-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED #
-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING #
-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED #
-- # OF THE POSSIBILITY OF SUCH DAMAGE. #
-- # ********************************************************************************************* #
-- # The NEORV32 Processor - https://github.com/stnolting/neorv32 (c) Stephan Nolting #
-- #################################################################################################
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library neorv32;
use neorv32.neorv32_package.all;
entity neorv32_cpu_control is
generic (
-- General --
XLEN : natural; -- data path width
HW_THREAD_ID : natural; -- hardware thread id (32-bit)
CPU_BOOT_ADDR : std_ulogic_vector(31 downto 0); -- cpu boot address
CPU_DEBUG_PARK_ADDR : std_ulogic_vector(31 downto 0); -- cpu debug mode parking loop entry address
CPU_DEBUG_EXC_ADDR : std_ulogic_vector(31 downto 0); -- cpu debug mode exception entry address
-- RISC-V CPU Extensions --
CPU_EXTENSION_RISCV_B : boolean; -- implement bit-manipulation extension?
CPU_EXTENSION_RISCV_C : boolean; -- implement compressed extension?
CPU_EXTENSION_RISCV_E : boolean; -- implement embedded RF extension?
CPU_EXTENSION_RISCV_M : boolean; -- implement mul/div extension?
CPU_EXTENSION_RISCV_U : boolean; -- implement user mode extension?
CPU_EXTENSION_RISCV_Zfinx : boolean; -- implement 32-bit floating-point extension (using INT reg!)
CPU_EXTENSION_RISCV_Zicsr : boolean; -- implement CSR system?
CPU_EXTENSION_RISCV_Zicntr : boolean; -- implement base counters?
CPU_EXTENSION_RISCV_Zihpm : boolean; -- implement hardware performance monitors?
CPU_EXTENSION_RISCV_Zifencei : boolean; -- implement instruction stream sync.?
CPU_EXTENSION_RISCV_Zmmul : boolean; -- implement multiply-only M sub-extension?
CPU_EXTENSION_RISCV_Zxcfu : boolean; -- implement custom (instr.) functions unit?
CPU_EXTENSION_RISCV_DEBUG : boolean; -- implement CPU debug mode?
-- Tuning Options --
FAST_MUL_EN : boolean; -- use DSPs for M extension's multiplier
FAST_SHIFT_EN : boolean; -- use barrel shifter for shift operations
CPU_IPB_ENTRIES : natural; -- entries in instruction prefetch buffer, has to be a power of 2, min 1
-- Physical memory protection (PMP) --
PMP_NUM_REGIONS : natural; -- number of regions (0..16)
PMP_MIN_GRANULARITY : natural; -- minimal region granularity in bytes, has to be a power of 2, min 4 bytes
-- Hardware Performance Monitors (HPM) --
HPM_NUM_CNTS : natural; -- number of implemented HPM counters (0..29)
HPM_CNT_WIDTH : natural -- total size of HPM counters (0..64)
);
port (
-- global control --
clk_i : in std_ulogic; -- global clock, rising edge
rstn_i : in std_ulogic; -- global reset, low-active, async
ctrl_o : out std_ulogic_vector(ctrl_width_c-1 downto 0); -- main control bus
-- instruction fetch interface --
i_bus_addr_o : out std_ulogic_vector(XLEN-1 downto 0); -- bus access address
i_bus_rdata_i : in std_ulogic_vector(31 downto 0); -- bus read data
i_bus_re_o : out std_ulogic; -- read enable
i_bus_ack_i : in std_ulogic; -- bus transfer acknowledge
i_bus_err_i : in std_ulogic; -- bus transfer error
i_pmp_fault_i : in std_ulogic; -- instruction fetch pmp fault
-- status input --
alu_idone_i : in std_ulogic; -- ALU iterative operation done
bus_d_wait_i : in std_ulogic; -- wait for bus
-- data input --
cmp_i : in std_ulogic_vector(1 downto 0); -- comparator status
alu_add_i : in std_ulogic_vector(XLEN-1 downto 0); -- ALU address result
rs1_i : in std_ulogic_vector(XLEN-1 downto 0); -- rf source 1
-- data output --
imm_o : out std_ulogic_vector(XLEN-1 downto 0); -- immediate
curr_pc_o : out std_ulogic_vector(XLEN-1 downto 0); -- current PC (corresponding to current instruction)
next_pc_o : out std_ulogic_vector(XLEN-1 downto 0); -- next PC (corresponding to next instruction)
csr_rdata_o : out std_ulogic_vector(XLEN-1 downto 0); -- CSR read data
-- FPU interface --
fpu_flags_i : in std_ulogic_vector(4 downto 0); -- exception flags
-- debug mode (halt) request --
db_halt_req_i : in std_ulogic;
-- interrupts (risc-v compliant) --
msw_irq_i : in std_ulogic; -- machine software interrupt
mext_irq_i : in std_ulogic; -- machine external interrupt
mtime_irq_i : in std_ulogic; -- machine timer interrupt
-- fast interrupts (custom) --
firq_i : in std_ulogic_vector(15 downto 0);
-- system time input from MTIME --
time_i : in std_ulogic_vector(63 downto 0); -- current system time
-- physical memory protection --
pmp_addr_o : out pmp_addr_if_t; -- addresses
pmp_ctrl_o : out pmp_ctrl_if_t; -- configs
-- bus access exceptions --
mar_i : in std_ulogic_vector(XLEN-1 downto 0); -- memory address register
ma_load_i : in std_ulogic; -- misaligned load data address
ma_store_i : in std_ulogic; -- misaligned store data address
be_load_i : in std_ulogic; -- bus error on load data access
be_store_i : in std_ulogic -- bus error on store data access
);
end neorv32_cpu_control;
architecture neorv32_cpu_control_rtl of neorv32_cpu_control is
-- HPM counter width - high/low parts --
constant hpm_cnt_lo_width_c : natural := natural(cond_sel_int_f(boolean(HPM_CNT_WIDTH < 32), HPM_CNT_WIDTH, 32));
constant hpm_cnt_hi_width_c : natural := natural(cond_sel_int_f(boolean(HPM_CNT_WIDTH > 32), HPM_CNT_WIDTH-32, 0));
-- instruction fetch engine --
type fetch_engine_state_t is (IF_RESTART, IF_REQUEST, IF_PENDING, IF_WAIT); -- better use one-hot encoding
type fetch_engine_t is record
state : fetch_engine_state_t;
state_prev : fetch_engine_state_t;
restart : std_ulogic;
unaligned : std_ulogic;
pc : std_ulogic_vector(XLEN-1 downto 0);
reset : std_ulogic;
resp : std_ulogic; -- bus response
a_err : std_ulogic; -- alignment error
pmp_err : std_ulogic; -- PMP error
end record;
signal fetch_engine : fetch_engine_t;
-- instruction prefetch buffer (FIFO) interface --
type ipb_data_t is array (0 to 1) of std_ulogic_vector((2+16)-1 downto 0); -- status (bus_error, align_error) + 16-bit instruction
type ipb_t is record
wdata : ipb_data_t;
we : std_ulogic_vector(1 downto 0); -- trigger write
free : std_ulogic_vector(1 downto 0); -- free entry available?
rdata : ipb_data_t;
re : std_ulogic_vector(1 downto 0); -- read enable
avail : std_ulogic_vector(1 downto 0); -- data available?
end record;
signal ipb : ipb_t;
-- instruction issue engine --
type issue_engine_t is record
align : std_ulogic;
align_set : std_ulogic;
align_clr : std_ulogic;
ci_i16 : std_ulogic_vector(15 downto 0);
ci_i32 : std_ulogic_vector(31 downto 0);
ci_ill : std_ulogic;
data : std_ulogic_vector((4+32)-1 downto 0); -- 4-bit status + 32-bit instruction
valid : std_ulogic_vector(1 downto 0); -- data word is valid when != 0
end record;
signal issue_engine : issue_engine_t;
-- instruction decoding helper logic --
type decode_aux_t is record
is_f_op : std_ulogic;
is_m_mul : std_ulogic;
is_m_div : std_ulogic;
is_b_imm : std_ulogic;
is_b_reg : std_ulogic;
rs1_zero : std_ulogic;
rd_zero : std_ulogic;
end record;
signal decode_aux : decode_aux_t;
-- instruction execution engine --
-- make sure reset state is the first item in the list (discussion #415)
type execute_engine_state_t is (BRANCHED, DISPATCH, TRAP_ENTER, TRAP_START, TRAP_EXIT, TRAP_EXECUTE,
EXECUTE, ALU_WAIT, BRANCH, SYSTEM, MEM_REQ, MEM_WAIT);
type execute_engine_t is record
state : execute_engine_state_t;
state_nxt : execute_engine_state_t;
state_prev : execute_engine_state_t;
state_prev2 : execute_engine_state_t;
--
i_reg : std_ulogic_vector(31 downto 0);
i_reg_nxt : std_ulogic_vector(31 downto 0);
--
is_ci : std_ulogic; -- current instruction is de-compressed instruction
is_ci_nxt : std_ulogic;
is_ici : std_ulogic; -- current instruction is illegal de-compressed instruction
is_ici_nxt : std_ulogic;
--
branch_taken : std_ulogic; -- branch condition fulfilled
pc : std_ulogic_vector(XLEN-1 downto 0); -- actual PC, corresponding to current executed instruction
pc_mux_sel : std_ulogic; -- source select for PC update
pc_we : std_ulogic; -- PC update enabled
next_pc : std_ulogic_vector(XLEN-1 downto 0); -- next PC, corresponding to next instruction to be executed
next_pc_inc : std_ulogic_vector(XLEN-1 downto 0); -- increment to get next PC
pc_last : std_ulogic_vector(XLEN-1 downto 0); -- PC of last executed instruction
--
sleep : std_ulogic; -- CPU in sleep mode
sleep_nxt : std_ulogic;
branched : std_ulogic; -- instruction fetch was reset
branched_nxt : std_ulogic;
end record;
signal execute_engine : execute_engine_t;
-- trap controller --
type trap_ctrl_t is record
exc_buf : std_ulogic_vector(exc_width_c-1 downto 0); -- synchronous exception buffer (one bit per exception)
exc_fire : std_ulogic; -- set if there is a valid source in the exception buffer
irq_buf : std_ulogic_vector(irq_width_c-1 downto 0); -- asynchronous exception/interrupt buffer (one bit per interrupt source)
irq_fire : std_ulogic; -- set if there is a valid source in the interrupt buffer
cause : std_ulogic_vector(6 downto 0); -- trap ID for mcause CSR + debug-mode entry identifier
cause_nxt : std_ulogic_vector(6 downto 0);
epc : std_ulogic_vector(XLEN-1 downto 0); -- exception program counter
db_irq_fire : std_ulogic; -- set if there is a valid IRQ source in the "enter debug mode" trap buffer
db_irq_en : std_ulogic; -- set if IRQs are allowed in debug mode
--
env_start : std_ulogic; -- start trap handler env
env_start_ack : std_ulogic; -- start of trap handler acknowledged
env_end : std_ulogic; -- end trap handler env
--
instr_be : std_ulogic; -- instruction fetch bus error
instr_ma : std_ulogic; -- instruction fetch misaligned address
instr_il : std_ulogic; -- illegal instruction
env_call : std_ulogic; -- ecall instruction
break_point : std_ulogic; -- ebreak instruction
end record;
signal trap_ctrl : trap_ctrl_t;
-- CPU main control bus --
signal ctrl_nxt, ctrl : std_ulogic_vector(ctrl_width_c-1 downto 0);
-- RISC-V control and status registers (CSRs) --
type pmpcfg_t is array (0 to PMP_NUM_REGIONS-1) of std_ulogic_vector(7 downto 0);
type pmpcfg_rd_t is array (0 to 3) of std_ulogic_vector(31 downto 0);
type pmpaddr_t is array (0 to PMP_NUM_REGIONS-1) of std_ulogic_vector(XLEN-3 downto index_size_f(PMP_MIN_GRANULARITY)-2);
type mhpmevent_t is array (0 to HPM_NUM_CNTS-1) of std_ulogic_vector(hpmcnt_event_size_c-1 downto 0);
type mhpmevent_rd_t is array (0 to 28) of std_ulogic_vector(XLEN-1 downto 0);
type mhpmcnt_t is array (0 to HPM_NUM_CNTS-1) of std_ulogic_vector(XLEN-1 downto 0);
type mhpmcnt_nxt_t is array (0 to HPM_NUM_CNTS-1) of std_ulogic_vector(XLEN downto 0);
type mhpmcnt_ovfl_t is array (0 to HPM_NUM_CNTS-1) of std_ulogic_vector(0 downto 0);
type mhpmcnt_rd_t is array (0 to 29) of std_ulogic_vector(XLEN-1 downto 0);
type csr_t is record
addr : std_ulogic_vector(11 downto 0); -- csr address
we : std_ulogic; -- csr write enable
we_nxt : std_ulogic;
re : std_ulogic; -- csr read enable
re_nxt : std_ulogic;
wdata : std_ulogic_vector(XLEN-1 downto 0); -- csr write data
rdata : std_ulogic_vector(XLEN-1 downto 0); -- csr read data
--
mstatus_mie : std_ulogic; -- mstatus.MIE: global IRQ enable (R/W)
mstatus_mpie : std_ulogic; -- mstatus.MPIE: previous global IRQ enable (R/W)
mstatus_mpp : std_ulogic; -- mstatus.MPP: machine previous privilege mode
mstatus_mprv : std_ulogic; -- mstatus.MPRV: effective privilege level for machine-mode load/stores
mstatus_tw : std_ulogic; -- mstatus.TW: do not allow user mode to execute WFI instruction when set
--
mie_msi : std_ulogic; -- mie.MSIE: machine software interrupt enable (R/W)
mie_mei : std_ulogic; -- mie.MEIE: machine external interrupt enable (R/W)
mie_mti : std_ulogic; -- mie.MEIE: machine timer interrupt enable (R/W)
mie_firq : std_ulogic_vector(15 downto 0); -- mie.firq*e: fast interrupt enabled (R/W)
--
mip_firq_nclr : std_ulogic_vector(15 downto 0); -- clear pending FIRQ (active-low)
--
mcounteren_cy : std_ulogic; -- mcounteren.cy: allow cycle[h] access from user-mode
mcounteren_tm : std_ulogic; -- mcounteren.tm: allow time[h] access from user-mode
mcounteren_ir : std_ulogic; -- mcounteren.ir: allow instret[h] access from user-mode
--
mcountinhibit_cy : std_ulogic; -- mcounterinhibit.cy: inhibit auto-increment for [m]cycle[h]
mcountinhibit_ir : std_ulogic; -- mcounterinhibit.ir: inhibit auto-increment for [m]instret[h]
mcountinhibit_hpm : std_ulogic_vector(HPM_NUM_CNTS-1 downto 0); -- mcounterinhibit.hpm3: inhibit auto-increment for mhpmcounterx[h]
--
privilege : std_ulogic; -- current privilege mode
privilege_eff : std_ulogic; -- current *effective* privilege mode
--
mepc : std_ulogic_vector(XLEN-1 downto 0); -- mepc: machine exception pc (R/W)
mcause : std_ulogic_vector(5 downto 0); -- mcause: machine trap cause (R/W)
mtvec : std_ulogic_vector(XLEN-1 downto 0); -- mtvec: machine trap-handler base address (R/W), bit 1:0 == 00
mtval : std_ulogic_vector(XLEN-1 downto 0); -- mtval: machine bad address or instruction (R/W)
--
mhpmevent : mhpmevent_t; -- mhpmevent*: machine performance-monitoring event selector (R/W)
mhpmevent_rd : mhpmevent_rd_t; -- read data
--
mscratch : std_ulogic_vector(XLEN-1 downto 0); -- mscratch: scratch register (R/W)
--
mcycle : std_ulogic_vector(XLEN-1 downto 0); -- mcycle (R/W)
mcycle_nxt : std_ulogic_vector(XLEN downto 0);
mcycle_ovfl : std_ulogic_vector(0 downto 0); -- counter low-to-high-word overflow
mcycleh : std_ulogic_vector(XLEN-1 downto 0); -- mcycleh (R/W)
minstret : std_ulogic_vector(XLEN-1 downto 0); -- minstret (R/W)
minstret_nxt : std_ulogic_vector(XLEN downto 0);
minstret_ovfl : std_ulogic_vector(0 downto 0); -- counter low-to-high-word overflow
minstreth : std_ulogic_vector(XLEN-1 downto 0); -- minstreth (R/W)
--
mhpmcounter : mhpmcnt_t; -- mhpmcounter* (R/W), plus carry bit
mhpmcounter_nxt : mhpmcnt_nxt_t;
mhpmcounter_ovfl : mhpmcnt_ovfl_t; -- counter low-to-high-word overflow
mhpmcounterh : mhpmcnt_t; -- mhpmcounter*h (R/W)
mhpmcounter_rd : mhpmcnt_rd_t; -- mhpmcounter* (R/W): actual read data
mhpmcounterh_rd : mhpmcnt_rd_t; -- mhpmcounter*h (R/W): actual read data
--
pmpcfg : pmpcfg_t; -- physical memory protection - configuration registers
pmpcfg_rd : pmpcfg_rd_t; -- physical memory protection - configuration read-back
pmpaddr : pmpaddr_t; -- physical memory protection - address registers (bits 33:2 of PHYSICAL address)
--
frm : std_ulogic_vector(2 downto 0); -- frm (R/W): FPU rounding mode
fflags : std_ulogic_vector(4 downto 0); -- fflags (R/W): FPU exception flags
--
dcsr_ebreakm : std_ulogic; -- dcsr.ebreakm (R/W): behavior of ebreak instruction on m-mode
dcsr_ebreaku : std_ulogic; -- dcsr.ebreaku (R/W): behavior of ebreak instruction on u-mode
dcsr_step : std_ulogic; -- dcsr.step (R/W): single-step mode
dcsr_prv : std_ulogic; -- dcsr.prv (R/W): current privilege level when entering debug mode
dcsr_cause : std_ulogic_vector(2 downto 0); -- dcsr.cause (R/-): why was debug mode entered
dcsr_rd : std_ulogic_vector(XLEN-1 downto 0); -- dcsr (R/(W)): debug mode control and status register
dpc : std_ulogic_vector(XLEN-1 downto 0); -- dpc (R/W): debug mode program counter
dscratch0 : std_ulogic_vector(XLEN-1 downto 0); -- dscratch0 (R/W): debug mode scratch register 0
--
tdata1_exe : std_ulogic; -- enable (match) trigger
tdata1_rd : std_ulogic_vector(XLEN-1 downto 0); -- tdata1 (R/(W)): trigger register read-back
tdata2 : std_ulogic_vector(XLEN-1 downto 0); -- tdata2 (R/W): address-match register
end record;
signal csr : csr_t;
-- counter CSRs write access --
type hpm_we_t is array (0 to 1) of std_ulogic_vector(28 downto 0);
type cnt_csr_we_t is record
wdata : std_ulogic_vector(XLEN-1 downto 0);
cycle : std_ulogic_vector(1 downto 0);
instret : std_ulogic_vector(1 downto 0);
hpm : hpm_we_t;
end record;
signal cnt_csr_we : cnt_csr_we_t;
-- debug mode controller --
type debug_ctrl_state_t is (DEBUG_OFFLINE, DEBUG_PENDING, DEBUG_ONLINE, DEBUG_LEAVING);
type debug_ctrl_t is record
state : debug_ctrl_state_t;
running : std_ulogic; -- CPU is in debug mode
trig_hw : std_ulogic; -- hardware trigger
trig_break : std_ulogic; -- ebreak instruction trigger
trig_halt : std_ulogic; -- external request trigger
trig_step : std_ulogic; -- single-stepping mode trigger
dret : std_ulogic; -- executed DRET instruction
ext_halt_req : std_ulogic; -- external halt request buffer
end record;
signal debug_ctrl : debug_ctrl_t;
-- (hpm) counter events --
signal cnt_event : std_ulogic_vector(hpmcnt_event_size_c-1 downto 0);
signal hpmcnt_trigger : std_ulogic_vector(HPM_NUM_CNTS-1 downto 0);
-- illegal instruction check --
signal illegal_cmd : std_ulogic;
signal illegal_reg : std_ulogic; -- illegal register (>x15) - E-extension
-- CSR access/privilege and r/w check --
signal csr_acc_valid : std_ulogic; -- valid CSR access (implemented and valid priv. level)
signal csr_rw_valid : std_ulogic; -- valid CSR access (valid r/w access rights)
-- hardware trigger module --
signal hw_trigger_fire : std_ulogic;
begin
-- ****************************************************************************************************************************
-- Instruction Fetch (always fetch 32-bit-aligned 32-bit chunks of data)
-- ****************************************************************************************************************************
-- Fetch Engine FSM -----------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
fetch_engine_fsm: process(rstn_i, clk_i)
begin
if (rstn_i = '0') then
fetch_engine.state <= IF_RESTART;
fetch_engine.state_prev <= IF_RESTART;
fetch_engine.restart <= '1'; -- set to reset IPB
fetch_engine.unaligned <= '0'; -- always start at aligned address after reset
fetch_engine.pc <= (others => '0');
fetch_engine.pmp_err <= '0';
elsif rising_edge(clk_i) then
-- previous state (for HPM) --
fetch_engine.state_prev <= fetch_engine.state;
-- restart request buffer --
if (fetch_engine.state = IF_RESTART) then -- restart done
fetch_engine.restart <= '0';
else -- buffer request
fetch_engine.restart <= fetch_engine.restart or fetch_engine.reset;
end if;
-- fsm --
case fetch_engine.state is
when IF_RESTART => -- set new fetch start address
-- ------------------------------------------------------------
fetch_engine.pc <= execute_engine.pc(XLEN-1 downto 2) & "00"; -- initialize with "real" PC, 32-bit aligned
fetch_engine.unaligned <= execute_engine.pc(1);
fetch_engine.state <= IF_REQUEST;
when IF_REQUEST => -- request new 32-bit-aligned instruction word
-- ------------------------------------------------------------
fetch_engine.pmp_err <= i_pmp_fault_i;
if (ipb.free = "11") then -- wait for free IPB space
fetch_engine.state <= IF_PENDING;
end if;
when IF_PENDING => -- wait for bus response and write instruction data to prefetch buffer
-- ------------------------------------------------------------
if (fetch_engine.resp = '1') then -- wait for bus response
fetch_engine.pc <= std_ulogic_vector(unsigned(fetch_engine.pc) + 4);
fetch_engine.unaligned <= '0';
fetch_engine.pmp_err <= '0';
if (fetch_engine.restart = '1') or (fetch_engine.reset = '1') then -- restart request (fast)
fetch_engine.state <= IF_RESTART;
elsif -- > this is something like a simple branch prediction (predict "always taken"):
-- > do not trigger new instruction fetch when a branch instruction is being executed (wait for branch destination);
-- > the two LSB should be "11" for rv32, so we do not need to check them here
(execute_engine.i_reg(instr_opcode_msb_c downto instr_opcode_lsb_c+2) = opcode_branch_c(6 downto 2)) or -- might be taken
(execute_engine.i_reg(instr_opcode_msb_c downto instr_opcode_lsb_c+2) = opcode_jal_c(6 downto 2)) or -- will be taken
(execute_engine.i_reg(instr_opcode_msb_c downto instr_opcode_lsb_c+2) = opcode_jalr_c(6 downto 2)) then -- will be taken
fetch_engine.state <= IF_WAIT;
else -- request next instruction word
fetch_engine.state <= IF_REQUEST;
end if;
end if;
when IF_WAIT => -- wait for branch instruction
-- ------------------------------------------------------------
if (fetch_engine.restart = '1') or (fetch_engine.reset = '1') then -- restart request (fast) if taken branch
fetch_engine.state <= IF_RESTART;
else
fetch_engine.state <= IF_REQUEST;
end if;
when others => -- undefined
-- ------------------------------------------------------------
fetch_engine.state <= IF_RESTART;
end case;
end if;
end process fetch_engine_fsm;
-- PC output for instruction fetch --
i_bus_addr_o <= fetch_engine.pc(XLEN-1 downto 2) & "00"; -- 32-bit aligned
-- instruction fetch (read) request if IPB not full --
i_bus_re_o <= '1' when (fetch_engine.state = IF_REQUEST) and (ipb.free = "11") else '0';
-- unaligned access error (no alignment exceptions possible when using C-extension) --
fetch_engine.a_err <= '1' when (fetch_engine.unaligned = '1') and (CPU_EXTENSION_RISCV_C = false) else '0';
-- instruction bus response --
fetch_engine.resp <= '1' when (i_bus_ack_i = '1') or -- bus acknowledge
(i_bus_err_i = '1') or -- bus access error
(fetch_engine.pmp_err = '1') or -- PMP error
(fetch_engine.a_err = '1') else '0'; -- alignment error
-- IPB instruction data and status --
ipb.wdata(0) <= (i_bus_err_i or fetch_engine.pmp_err) & fetch_engine.a_err & i_bus_rdata_i(15 downto 00);
ipb.wdata(1) <= (i_bus_err_i or fetch_engine.pmp_err) & fetch_engine.a_err & i_bus_rdata_i(31 downto 16);
-- IPB write enable --
ipb.we(0) <= '1' when (fetch_engine.state = IF_PENDING) and (fetch_engine.resp = '1') and
((fetch_engine.unaligned = '0') or (CPU_EXTENSION_RISCV_C = false)) else '0';
ipb.we(1) <= '1' when (fetch_engine.state = IF_PENDING) and (fetch_engine.resp = '1') else '0';
-- ****************************************************************************************************************************
-- Instruction Prefetch Buffer
-- ****************************************************************************************************************************
-- Instruction Prefetch Buffer (FIFO) -----------------------------------------------------
-- -------------------------------------------------------------------------------------------
prefetch_buffer:
for i in 0 to 1 generate -- low half-word and high half-word (+status)
prefetch_buffer_inst: neorv32_fifo
generic map (
FIFO_DEPTH => CPU_IPB_ENTRIES, -- number of fifo entries; has to be a power of two; min 1
FIFO_WIDTH => ipb.wdata(i)'length, -- size of data elements in fifo
FIFO_RSYNC => false, -- we NEED to read data asynchronously
FIFO_SAFE => false, -- no safe access required (ensured by FIFO-external control)
FIFO_GATE => false -- no output gate required
)
port map (
-- control --
clk_i => clk_i, -- clock, rising edge
rstn_i => rstn_i, -- async reset, low-active
clear_i => fetch_engine.restart, -- sync reset, high-active
half_o => open, -- at least half full
-- write port --
wdata_i => ipb.wdata(i), -- write data
we_i => ipb.we(i), -- write enable
free_o => ipb.free(i), -- at least one entry is free when set
-- read port --
re_i => ipb.re(i), -- read enable
rdata_o => ipb.rdata(i), -- read data
avail_o => ipb.avail(i) -- data available when set
);
end generate;
-- ****************************************************************************************************************************
-- Instruction Issue (decompress 16-bit instructions and assemble a 32-bit instruction word)
-- ****************************************************************************************************************************
-- Issue Engine FSM Sync ------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
issue_engine_fsm_sync: process(clk_i)
begin
if rising_edge(clk_i) then
if (CPU_EXTENSION_RISCV_C = true) then
if (fetch_engine.restart = '1') then
issue_engine.align <= execute_engine.pc(1); -- branch to unaligned address?
elsif (execute_engine.state = DISPATCH) then
issue_engine.align <= (issue_engine.align and (not issue_engine.align_clr)) or issue_engine.align_set; -- "RS" flip-flop
end if;
else
issue_engine.align <= '0'; -- always aligned
end if;
end if;
end process issue_engine_fsm_sync;
-- Issue Engine FSM Comb ------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
issue_engine_fsm_comb: process(issue_engine, ipb)
begin
-- defaults --
issue_engine.align_set <= '0';
issue_engine.align_clr <= '0';
issue_engine.valid <= "00";
-- start with LOW half-word --
if (issue_engine.align = '0') or (CPU_EXTENSION_RISCV_C = false) then
if (CPU_EXTENSION_RISCV_C = true) and (ipb.rdata(0)(1 downto 0) /= "11") then -- compressed
issue_engine.align_set <= ipb.avail(0); -- start of next instruction word is NOT 32-bit-aligned
issue_engine.valid(0) <= ipb.avail(0);
issue_engine.data <= issue_engine.ci_ill & ipb.rdata(0)(17 downto 16) & '1' & issue_engine.ci_i32;
else -- aligned uncompressed
issue_engine.valid <= (others => (ipb.avail(0) and ipb.avail(1)));
issue_engine.data <= '0' & (ipb.rdata(1)(17 downto 16) or ipb.rdata(0)(17 downto 16)) &
'0' & (ipb.rdata(1)(15 downto 00) & ipb.rdata(0)(15 downto 00));
end if;
-- start with HIGH half-word --
else
if (CPU_EXTENSION_RISCV_C = true) and (ipb.rdata(1)(1 downto 0) /= "11") then -- compressed
issue_engine.align_clr <= ipb.avail(1); -- start of next instruction word IS 32-bit-aligned again
issue_engine.valid(1) <= ipb.avail(1);
issue_engine.data <= issue_engine.ci_ill & ipb.rdata(1)(17 downto 16) & '1' & issue_engine.ci_i32;
else -- unaligned uncompressed
issue_engine.valid <= (others => (ipb.avail(0) and ipb.avail(1)));
issue_engine.data <= '0' & (ipb.rdata(0)(17 downto 16) or ipb.rdata(1)(17 downto 16)) &
'0' & (ipb.rdata(0)(15 downto 00) & ipb.rdata(1)(15 downto 00));
end if;
end if;
end process issue_engine_fsm_comb;
-- update IPB FIFOs (ready-for-next)? --
ipb.re(0) <= '1' when (issue_engine.valid(0) = '1') and (execute_engine.state = DISPATCH) else '0';
ipb.re(1) <= '1' when (issue_engine.valid(1) = '1') and (execute_engine.state = DISPATCH) else '0';
-- Compressed Instructions Decoding -------------------------------------------------------
-- -------------------------------------------------------------------------------------------
neorv32_cpu_decompressor_inst_true:
if (CPU_EXTENSION_RISCV_C = true) generate
neorv32_cpu_decompressor_inst: neorv32_cpu_decompressor
generic map (
FPU_ENABLE => CPU_EXTENSION_RISCV_Zfinx -- floating-point instructions enabled
)
port map (
ci_instr16_i => issue_engine.ci_i16, -- compressed instruction input
ci_illegal_o => issue_engine.ci_ill, -- illegal compressed instruction
ci_instr32_o => issue_engine.ci_i32 -- 32-bit decompressed instruction
);
end generate;
neorv32_cpu_decompressor_inst_false:
if (CPU_EXTENSION_RISCV_C = false) generate
issue_engine.ci_i32 <= (others => '0');
issue_engine.ci_ill <= '0';
end generate;
-- 16-bit instructions: half-word select --
issue_engine.ci_i16 <= ipb.rdata(0)(15 downto 0) when (issue_engine.align = '0') else ipb.rdata(1)(15 downto 0);
-- ****************************************************************************************************************************
-- Instruction Execution
-- ****************************************************************************************************************************
-- Immediate Generator --------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
imm_gen: process(clk_i)
variable opcode_v : std_ulogic_vector(6 downto 0);
begin
if rising_edge(clk_i) then
opcode_v := execute_engine.i_reg(instr_opcode_msb_c downto instr_opcode_lsb_c+2) & "11";
case opcode_v is -- save some bits here - the two LSBs are always "11" for 32-bit instructions
when opcode_store_c => -- S-immediate: store
imm_o(XLEN-1 downto 11) <= (others => execute_engine.i_reg(31)); -- sign extension
imm_o(10 downto 05) <= execute_engine.i_reg(30 downto 25);
imm_o(04 downto 00) <= execute_engine.i_reg(11 downto 07);
when opcode_branch_c => -- B-immediate: conditional branches
imm_o(XLEN-1 downto 12) <= (others => execute_engine.i_reg(31)); -- sign extension
imm_o(11) <= execute_engine.i_reg(07);
imm_o(10 downto 05) <= execute_engine.i_reg(30 downto 25);
imm_o(04 downto 01) <= execute_engine.i_reg(11 downto 08);
imm_o(00) <= '0';
when opcode_lui_c | opcode_auipc_c => -- U-immediate: lui, auipc
imm_o(XLEN-1 downto 12) <= execute_engine.i_reg(31 downto 12);
imm_o(11 downto 00) <= (others => '0');
when opcode_jal_c => -- J-immediate: unconditional jumps
imm_o(XLEN-1 downto 20) <= (others => execute_engine.i_reg(31)); -- sign extension
imm_o(19 downto 12) <= execute_engine.i_reg(19 downto 12);
imm_o(11) <= execute_engine.i_reg(20);
imm_o(10 downto 01) <= execute_engine.i_reg(30 downto 21);
imm_o(00) <= '0';
when others => -- I-immediate: ALU-immediate, loads, jump-and-link with register
imm_o(XLEN-1 downto 11) <= (others => execute_engine.i_reg(31)); -- sign extension
imm_o(10 downto 01) <= execute_engine.i_reg(30 downto 21);
imm_o(00) <= execute_engine.i_reg(20);
end case;
end if;
end process imm_gen;
-- Branch Condition Check -----------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
branch_check: process(execute_engine.i_reg, cmp_i)
begin -- this is hacky!
if (execute_engine.i_reg(instr_funct3_msb_c) = '0') then -- beq / bne
execute_engine.branch_taken <= cmp_i(cmp_equal_c) xor execute_engine.i_reg(instr_funct3_lsb_c);
else -- blt(u) / bge(u)
execute_engine.branch_taken <= cmp_i(cmp_less_c) xor execute_engine.i_reg(instr_funct3_lsb_c);
end if;
end process branch_check;
-- Execute Engine FSM Sync ----------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
execute_engine_fsm_sync: process(rstn_i, clk_i)
begin
if (rstn_i = '0') then
execute_engine.state <= BRANCHED; -- reset is a branch from "somewhere"
execute_engine.state_prev <= BRANCHED;
execute_engine.state_prev2 <= BRANCHED;
execute_engine.branched <= '1'; -- reset is a branch from "somewhere"
execute_engine.i_reg <= (others => '0');
execute_engine.is_ci <= '0';
execute_engine.is_ici <= '0';
ctrl <= (others => '0');
execute_engine.sleep <= '0';
execute_engine.pc_last <= (others => '0');
execute_engine.pc <= CPU_BOOT_ADDR(XLEN-1 downto 2) & "00"; -- 32-bit aligned boot address
execute_engine.next_pc <= (others => '0');
elsif rising_edge(clk_i) then
-- execute engine arbiter --
execute_engine.state <= execute_engine.state_nxt;
execute_engine.state_prev <= execute_engine.state; -- for HPMs only
execute_engine.state_prev2 <= execute_engine.state_prev; -- for HPMs only
execute_engine.branched <= execute_engine.branched_nxt;
execute_engine.i_reg <= execute_engine.i_reg_nxt;
execute_engine.is_ci <= execute_engine.is_ci_nxt;
execute_engine.is_ici <= execute_engine.is_ici_nxt;
-- main control bus buffer --
ctrl <= ctrl_nxt;
-- sleep mode --
if (CPU_EXTENSION_RISCV_DEBUG = true) and ((debug_ctrl.running = '1') or (csr.dcsr_step = '1')) then
execute_engine.sleep <= '0'; -- no sleep when in debug mode
else
execute_engine.sleep <= execute_engine.sleep_nxt;
end if;
-- PC of "last executed" instruction for trap handling --
if (execute_engine.state = EXECUTE) then
execute_engine.pc_last <= execute_engine.pc;
end if;
-- PC update --
if (execute_engine.pc_we = '1') then
if (execute_engine.pc_mux_sel = '0') then
execute_engine.pc <= execute_engine.next_pc(XLEN-1 downto 1) & '0'; -- normal (linear) increment OR trap enter/exit
else
execute_engine.pc <= alu_add_i(XLEN-1 downto 1) & '0'; -- jump/taken_branch
end if;
end if;
-- next PC logic --
case execute_engine.state is
when TRAP_START => -- STARTING trap environment
if (trap_ctrl.cause(5) = '1') and (CPU_EXTENSION_RISCV_DEBUG = true) then -- trap cause: debug mode (re-)entry
execute_engine.next_pc <= CPU_DEBUG_PARK_ADDR; -- debug mode enter; start at "parking loop" <normal_entry>
elsif (debug_ctrl.running = '1') and (CPU_EXTENSION_RISCV_DEBUG = true) then -- any other exception INSIDE debug mode
execute_engine.next_pc <= CPU_DEBUG_EXC_ADDR; -- debug mode enter: start at "parking loop" <exception_entry>
else -- normal start of trap
execute_engine.next_pc <= csr.mtvec(XLEN-1 downto 2) & "00"; -- trap enter
end if;
when TRAP_EXIT => -- LEAVING trap environment
if (debug_ctrl.running = '1') and (CPU_EXTENSION_RISCV_DEBUG = true) then -- debug mode exit
execute_engine.next_pc <= csr.dpc(XLEN-1 downto 1) & '0'; -- debug mode exit
else -- normal end of trap
execute_engine.next_pc <= csr.mepc(XLEN-1 downto 1) & '0'; -- trap exit
end if;
when EXECUTE => -- NORMAL pc increment
execute_engine.next_pc <= std_ulogic_vector(unsigned(execute_engine.pc) + unsigned(execute_engine.next_pc_inc)); -- next linear PC
when others =>
NULL;
end case;
end if;
end process execute_engine_fsm_sync;
-- PC increment for next linear instruction (+2 for compressed instr., +4 otherwise) --
execute_engine.next_pc_inc(XLEN-1 downto 4) <= (others => '0');
execute_engine.next_pc_inc(3 downto 0) <= x"4" when ((execute_engine.is_ci = '0') or (CPU_EXTENSION_RISCV_C = false)) else x"2";
-- PC output --
curr_pc_o <= execute_engine.pc(XLEN-1 downto 1) & '0'; -- current PC
next_pc_o <= execute_engine.next_pc(XLEN-1 downto 1) & '0'; -- next PC
-- CPU Control Bus Output -----------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
ctrl_output: process(ctrl, fetch_engine, trap_ctrl, execute_engine, csr, debug_ctrl)
begin
-- default --
ctrl_o <= ctrl;
-- "commit" signals --
ctrl_o(ctrl_rf_wb_en_c) <= ctrl(ctrl_rf_wb_en_c) and (not trap_ctrl.exc_buf(exc_iillegal_c)); -- no write if illegal instruction
-- current (effective) privilege level --
ctrl_o(ctrl_priv_mode_c) <= csr.privilege_eff;
if (csr.mstatus_mprv = '1') then -- effective privilege level for loads and stores in M-mode
ctrl_o(ctrl_bus_priv_c) <= csr.mstatus_mpp;
else
ctrl_o(ctrl_bus_priv_c) <= csr.privilege_eff;
end if;
-- register addresses --
ctrl_o(ctrl_rf_rs1_adr4_c downto ctrl_rf_rs1_adr0_c) <= execute_engine.i_reg(instr_rs1_msb_c downto instr_rs1_lsb_c);
ctrl_o(ctrl_rf_rs2_adr4_c downto ctrl_rf_rs2_adr0_c) <= execute_engine.i_reg(instr_rs2_msb_c downto instr_rs2_lsb_c);
ctrl_o(ctrl_rf_rs3_adr4_c downto ctrl_rf_rs3_adr0_c) <= execute_engine.i_reg(instr_rs3_msb_c downto instr_rs3_lsb_c);
ctrl_o(ctrl_rf_rd_adr4_c downto ctrl_rf_rd_adr0_c) <= execute_engine.i_reg(instr_rd_msb_c downto instr_rd_lsb_c);
-- instruction's function blocks --
ctrl_o(ctrl_ir_opcode7_6_c downto ctrl_ir_opcode7_0_c) <= execute_engine.i_reg(instr_opcode_msb_c downto instr_opcode_lsb_c);
ctrl_o(ctrl_ir_funct12_11_c downto ctrl_ir_funct12_0_c) <= execute_engine.i_reg(instr_funct12_msb_c downto instr_funct12_lsb_c);
ctrl_o(ctrl_ir_funct3_2_c downto ctrl_ir_funct3_0_c) <= execute_engine.i_reg(instr_funct3_msb_c downto instr_funct3_lsb_c);
-- cpu status --
ctrl_o(ctrl_sleep_c) <= execute_engine.sleep; -- cpu is in sleep mode
ctrl_o(ctrl_trap_c) <= trap_ctrl.env_start_ack; -- cpu is starting a trap handler
ctrl_o(ctrl_debug_running_c) <= debug_ctrl.running; -- cpu is currently in debug mode
-- FPU rounding mode --
ctrl_o(ctrl_alu_frm2_c downto ctrl_alu_frm0_c) <= csr.frm;
end process ctrl_output;
-- Decoding Helper Logic ------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
decode_helper: process(execute_engine)
begin
-- defaults --
decode_aux.is_f_op <= '0';
decode_aux.is_m_mul <= '0';
decode_aux.is_m_div <= '0';
decode_aux.is_b_imm <= '0';
decode_aux.is_b_reg <= '0';
decode_aux.rs1_zero <= '0';
decode_aux.rd_zero <= '0';
-- is BITMANIP instruction? --
-- pretty complex as we have to check the already-crowded ALU/ALUI instruction space --
if (CPU_EXTENSION_RISCV_B = true) then -- BITMANIP implemented at all?
-- register-immediate operation --
if ((execute_engine.i_reg(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0110000") and (execute_engine.i_reg(instr_funct3_msb_c downto instr_funct3_lsb_c) = "001") and
(
(execute_engine.i_reg(instr_funct12_lsb_c+4 downto instr_funct12_lsb_c) = "00000") or -- CLZ
(execute_engine.i_reg(instr_funct12_lsb_c+4 downto instr_funct12_lsb_c) = "00001") or -- CTZ
(execute_engine.i_reg(instr_funct12_lsb_c+4 downto instr_funct12_lsb_c) = "00010") or -- CPOP
(execute_engine.i_reg(instr_funct12_lsb_c+4 downto instr_funct12_lsb_c) = "00100") or -- SEXT.B
(execute_engine.i_reg(instr_funct12_lsb_c+4 downto instr_funct12_lsb_c) = "00101") -- SEXT.H
)
) or
((execute_engine.i_reg(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0110000") and (execute_engine.i_reg(instr_funct3_msb_c downto instr_funct3_lsb_c) = "101")) or -- RORI
((execute_engine.i_reg(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0010100") and (execute_engine.i_reg(instr_funct3_msb_c downto instr_funct3_lsb_c) = "101") and
(execute_engine.i_reg(instr_funct12_lsb_c+4 downto instr_funct12_lsb_c) = "00111")) or -- ORCB
((execute_engine.i_reg(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0100100") and (execute_engine.i_reg(instr_funct3_msb_c-1 downto instr_funct3_lsb_c) = "01")) or -- BCLRI / BEXTI
((execute_engine.i_reg(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0110100") and (execute_engine.i_reg(instr_funct3_msb_c-1 downto instr_funct3_lsb_c) = "01")) or -- REV8 / BINVI
((execute_engine.i_reg(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0010100") and (execute_engine.i_reg(instr_funct3_msb_c downto instr_funct3_lsb_c) = "001")) then -- BSETI
decode_aux.is_b_imm <= '1';
end if;
-- register-register operation --
if ((execute_engine.i_reg(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0110000") and (execute_engine.i_reg(instr_funct3_msb_c-1 downto instr_funct3_lsb_c) = "01")) or -- ROR / ROL
((execute_engine.i_reg(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0000101") and (execute_engine.i_reg(instr_funct3_msb_c downto instr_funct3_lsb_c) /= "000")) or -- MIN[U] / MAX[U] / CMUL[H/R]
((execute_engine.i_reg(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0000100") and (execute_engine.i_reg(instr_funct3_msb_c downto instr_funct3_lsb_c) = "100")) or -- ZEXTH
((execute_engine.i_reg(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0100100") and (execute_engine.i_reg(instr_funct3_msb_c-1 downto instr_funct3_lsb_c) = "01")) or -- BCLR / BEXT
((execute_engine.i_reg(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0110100") and (execute_engine.i_reg(instr_funct3_msb_c downto instr_funct3_lsb_c) = "001")) or -- BINV
((execute_engine.i_reg(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0010100") and (execute_engine.i_reg(instr_funct3_msb_c downto instr_funct3_lsb_c) = "001")) or -- BSET
((execute_engine.i_reg(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0100000") and
(
(execute_engine.i_reg(instr_funct3_msb_c downto instr_funct3_lsb_c) = "111") or -- ANDN
(execute_engine.i_reg(instr_funct3_msb_c downto instr_funct3_lsb_c) = "110") or -- ORN
(execute_engine.i_reg(instr_funct3_msb_c downto instr_funct3_lsb_c) = "100") -- XORN
)
) or
((execute_engine.i_reg(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0010000") and
(
(execute_engine.i_reg(instr_funct3_msb_c downto instr_funct3_lsb_c) = "010") or -- SH1ADD
(execute_engine.i_reg(instr_funct3_msb_c downto instr_funct3_lsb_c) = "100") or -- SH2ADD
(execute_engine.i_reg(instr_funct3_msb_c downto instr_funct3_lsb_c) = "110") -- SH3ADD
)
) then
decode_aux.is_b_reg <= '1';
end if;
end if;
-- floating-point operations (Zfinx) --
if (CPU_EXTENSION_RISCV_Zfinx = true) then -- FPU implemented at all?
if ((execute_engine.i_reg(instr_funct7_msb_c downto instr_funct7_lsb_c+3) = "0000")) or -- FADD.S / FSUB.S
((execute_engine.i_reg(instr_funct7_msb_c downto instr_funct7_lsb_c+2) = "00010")) or -- FMUL.S
((execute_engine.i_reg(instr_funct7_msb_c downto instr_funct7_lsb_c+2) = "11100") and (execute_engine.i_reg(instr_funct3_msb_c downto instr_funct3_lsb_c) = "001")) or -- FCLASS.S
((execute_engine.i_reg(instr_funct7_msb_c downto instr_funct7_lsb_c+2) = "00100") and (execute_engine.i_reg(instr_funct3_msb_c) = '0')) or -- FSGNJ[N/X].S
((execute_engine.i_reg(instr_funct7_msb_c downto instr_funct7_lsb_c+2) = "00101") and (execute_engine.i_reg(instr_funct3_msb_c downto instr_funct3_msb_c-1) = "00")) or -- FMIN.S / FMAX.S
((execute_engine.i_reg(instr_funct7_msb_c downto instr_funct7_lsb_c+2) = "10100") and (execute_engine.i_reg(instr_funct3_msb_c) = '0')) or -- FEQ.S / FLT.S / FLE.S
((execute_engine.i_reg(instr_funct7_msb_c downto instr_funct7_lsb_c+2) = "11010") and (execute_engine.i_reg(instr_funct12_lsb_c+4 downto instr_funct12_lsb_c+1) = "0000")) or -- FCVT.S.W*
((execute_engine.i_reg(instr_funct7_msb_c downto instr_funct7_lsb_c+2) = "11000") and (execute_engine.i_reg(instr_funct12_lsb_c+4 downto instr_funct12_lsb_c+1) = "0000")) then -- FCVT.W*.S
if (execute_engine.i_reg(instr_funct7_lsb_c+1 downto instr_funct7_lsb_c) = float_single_c) then -- single-precision operations only
decode_aux.is_f_op <= '1';
end if;
end if;
end if;
-- integer MUL (M/Zmmul) / DIV (M) operation --
if (execute_engine.i_reg(instr_opcode_lsb_c+5) = opcode_alu_c(5)) and (execute_engine.i_reg(instr_funct7_msb_c downto instr_funct7_lsb_c) = "0000001") then
if ((CPU_EXTENSION_RISCV_M = true) or (CPU_EXTENSION_RISCV_Zmmul = true)) and (execute_engine.i_reg(instr_funct3_msb_c) = '0') then
decode_aux.is_m_mul <= '1';
end if;
if (CPU_EXTENSION_RISCV_M = true) and (execute_engine.i_reg(instr_funct3_msb_c) = '1') then
decode_aux.is_m_div <= '1';
end if;
end if;
-- register/uimm5 checks --
if (execute_engine.i_reg(instr_rs1_msb_c downto instr_rs1_lsb_c) = "00000") then
decode_aux.rs1_zero <= '1';
end if;
if (execute_engine.i_reg(instr_rd_msb_c downto instr_rd_lsb_c) = "00000") then
decode_aux.rd_zero <= '1';
end if;
end process decode_helper;
-- CSR access address --
csr.addr <= execute_engine.i_reg(instr_imm12_msb_c downto instr_imm12_lsb_c);
-- Execute Engine FSM Comb ----------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
execute_engine_fsm_comb: process(execute_engine, debug_ctrl, trap_ctrl, decode_aux, fetch_engine, issue_engine, csr, ctrl, alu_idone_i, bus_d_wait_i)
begin
-- arbiter defaults --
execute_engine.state_nxt <= execute_engine.state;
execute_engine.i_reg_nxt <= execute_engine.i_reg;
execute_engine.is_ci_nxt <= execute_engine.is_ci;
execute_engine.is_ici_nxt <= '0';
execute_engine.sleep_nxt <= execute_engine.sleep;
execute_engine.branched_nxt <= execute_engine.branched;
execute_engine.pc_mux_sel <= '0';
execute_engine.pc_we <= '0';
-- instruction dispatch defaults --
fetch_engine.reset <= '0';
-- trap environment control defaults --
trap_ctrl.env_start_ack <= '0';
trap_ctrl.env_end <= '0';
trap_ctrl.instr_be <= '0';
trap_ctrl.instr_ma <= '0';
trap_ctrl.env_call <= '0';
trap_ctrl.break_point <= '0';
debug_ctrl.dret <= '0';
-- CSR access defaults --
csr.we_nxt <= '0';
csr.re_nxt <= '0';
-- CONTROL defaults --
ctrl_nxt <= (others => '0'); -- default: all off
ctrl_nxt(ctrl_alu_op2_c downto ctrl_alu_op0_c) <= alu_op_add_c; -- default ALU operation: ADD
ctrl_nxt(ctrl_rf_mux1_c downto ctrl_rf_mux0_c) <= rf_mux_alu_c; -- default RF input: ALU
-- ALU sign control --
if (execute_engine.i_reg(instr_opcode_lsb_c+4) = '1') then -- ALU ops
ctrl_nxt(ctrl_alu_unsigned_c) <= execute_engine.i_reg(instr_funct3_lsb_c+0); -- unsigned ALU operation? (SLTIU, SLTU)
else -- branches
ctrl_nxt(ctrl_alu_unsigned_c) <= execute_engine.i_reg(instr_funct3_lsb_c+1); -- unsigned branches? (BLTU, BGEU)
end if;
-- state machine --
case execute_engine.state is
when DISPATCH => -- Get new command from instruction issue engine
-- ------------------------------------------------------------
-- update PC and compressed instruction status flags --
execute_engine.pc_mux_sel <= '0'; -- next PC
execute_engine.pc_we <= not execute_engine.branched; -- update PC with next_pc if there was no actual branch
execute_engine.is_ci_nxt <= issue_engine.data(32); -- this is a de-compressed instruction
execute_engine.is_ici_nxt <= issue_engine.data(35); -- this is an illegal compressed instruction
--
if (issue_engine.valid(0) = '1') or (issue_engine.valid(1) = '1') then -- instruction available?
-- update IR *only* if we have a new instruction word available as this register must not contain non-defined values --
execute_engine.i_reg_nxt <= issue_engine.data(31 downto 0); -- <has to stay here>
-- clear branch flipflop --
execute_engine.branched_nxt <= '0';
-- instruction fetch exceptions --
trap_ctrl.instr_ma <= issue_engine.data(33) and (not bool_to_ulogic_f(CPU_EXTENSION_RISCV_C)); -- misaligned instruction fetch (if C disabled)
trap_ctrl.instr_be <= issue_engine.data(34); -- bus access fault during instruction fetch
-- any reason to go to trap state? --
if (execute_engine.sleep = '1') or -- enter sleep state
(trap_ctrl.exc_fire = '1') or -- exception during LAST instruction (e.g. illegal instruction)
(trap_ctrl.env_start = '1') or -- pending trap (IRQ or late exception)
((issue_engine.data(33) = '1') and (CPU_EXTENSION_RISCV_C = false)) or -- misaligned instruction fetch address (if C disabled) during instruction fetch
(issue_engine.data(34) = '1') then -- bus access fault during instruction fetch
execute_engine.state_nxt <= TRAP_ENTER;
else
execute_engine.state_nxt <= EXECUTE;
end if;
end if;
when TRAP_ENTER => -- Begin trap environment; stay here for sleep mode
-- ------------------------------------------------------------
-- this also serves as additional "delay" cycle to wait for (other) potential
-- sync. exceptions to reach the trap controller logic (issue #325)
if (trap_ctrl.env_start = '1') then -- trap triggered?
execute_engine.state_nxt <= TRAP_START;
end if;
when TRAP_START => -- Start trap environment and get trap vector
-- ------------------------------------------------------------
trap_ctrl.env_start_ack <= '1';
execute_engine.state_nxt <= TRAP_EXECUTE;
when TRAP_EXIT => -- Return from trap environment and get xEPC
-- ------------------------------------------------------------
trap_ctrl.env_end <= '1';
execute_engine.state_nxt <= TRAP_EXECUTE;
when TRAP_EXECUTE => -- Process trap environment
-- ------------------------------------------------------------
execute_engine.pc_mux_sel <= '0'; -- next_PC (xEPC or trap vector)
fetch_engine.reset <= '1';
execute_engine.pc_we <= '1';
execute_engine.sleep_nxt <= '0'; -- disable sleep mode
execute_engine.state_nxt <= BRANCHED;
when EXECUTE => -- Decode and execute instruction (control has to be here for exactly 1 cycle in any case!)
-- NOTE: register file is read in this stage; due to the sync read, data will be available in the _next_ state
-- ------------------------------------------------------------
case execute_engine.i_reg(instr_opcode_msb_c downto instr_opcode_lsb_c) is
when opcode_alu_c | opcode_alui_c => -- register/immediate ALU operation
-- ------------------------------------------------------------
-- register-immediate ALU operation --
if (execute_engine.i_reg(instr_opcode_msb_c-1) = '0') then
ctrl_nxt(ctrl_alu_opb_mux_c) <= '1'; -- use IMM as ALU.OPB
end if;
-- ALU core operation --
case execute_engine.i_reg(instr_funct3_msb_c downto instr_funct3_lsb_c) is -- actual ALU.logic operation (re-coding)
when funct3_subadd_c => -- ADD(I), SUB
if ((execute_engine.i_reg(instr_opcode_msb_c-1) = '1') and (execute_engine.i_reg(instr_funct7_msb_c-1) = '1')) then -- not an immediate op and funct7.6 set => SUB
ctrl_nxt(ctrl_alu_op2_c downto ctrl_alu_op0_c) <= alu_op_sub_c;
else
ctrl_nxt(ctrl_alu_op2_c downto ctrl_alu_op0_c) <= alu_op_add_c;
end if;
when funct3_slt_c | funct3_sltu_c => -- SLT(I), SLTU(I)
ctrl_nxt(ctrl_alu_op2_c downto ctrl_alu_op0_c) <= alu_op_slt_c;
when funct3_xor_c => -- XOR(I)
ctrl_nxt(ctrl_alu_op2_c downto ctrl_alu_op0_c) <= alu_op_xor_c;