Skip to content

Commit

Permalink
armstubs: Add BCM2711 support
Browse files Browse the repository at this point in the history
A summary commit of all 2711-related work by many people:

* Change OSC to 54MHz
* Add L2 read/writ latency
* Add GIC support and unconditionally setup the arch timer prescale
* Don't enable the data cache before the MMU is enabled.
* Enable the Cache after setting SMP bit. Cortex A72 manual 4.3.67
  says says SMP must be set before enabling the cache. Probably doesn't
  make any real difference.

N.B. armstub8-32 is for 2710 only and armstub8-32-gic is for 2711,
i.e. the GIC flag is also effectively a 2711 flag.

Signed-off-by: Phil Elwell <phil@raspberrypi.org>
  • Loading branch information
popcornmix authored and Phil Elwell committed Jun 24, 2019
1 parent 5caa704 commit 7f4a937
Show file tree
Hide file tree
Showing 3 changed files with 153 additions and 17 deletions.
25 changes: 22 additions & 3 deletions armstubs/Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
BINS=armstub.bin armstub7.bin armstub8-32.bin armstub8.bin
BINS=armstub.bin armstub7.bin armstub8-32.bin armstub8-32-gic.bin armstub8.bin armstub8-gic.bin

CC8=aarch64-linux-gnu-gcc
LD8=aarch64-linux-gnu-ld
Expand All @@ -20,17 +20,30 @@ clean :
%8.o: %8.S
$(CC8) -c $< -o $@

%8-gic.o: %8.S
$(CC8) -DGIC=1 -c $< -o $@

%8-32.o: %7.S
$(CC) -DBCM2710=1 -c $< -o $@

%8-32-gic.o: %7.S
$(CC) -DGIC=1 -DBCM2710=1 -DBCM2711=1 -c $< -o $@

%.o: %.S
$(CC) -DBCM2710=0 -c $< -o $@

%8-gic.elf: %8-gic.o
$(LD8) --section-start=.text=0 $< -o $@

%8.elf: %8.o
$(LD8) --section-start=.text=0 $< -o $@

%.elf: %.o
$(LD) --section-start=.init=0 $< -o $@

%8-gic.tmp: %8-gic.elf
$(OBJCOPY8) $< -O binary $@

%8.tmp: %8.elf
$(OBJCOPY8) $< -O binary $@

Expand All @@ -39,7 +52,7 @@ clean :


%.bin: %.tmp
dd if=$< ibs=256 count=1 of=$@ conv=sync
dd if=$< ibs=512 count=1 of=$@ conv=sync

%8.ds: %8.bin
$(OBJDUMP8) -D --target binary $< > $@
Expand All @@ -53,7 +66,7 @@ clean :
$(BIN2C): bin2c.c
gcc $< -o $@

armstubs.h: armstub.C armstub7.C armstub8-32.C armstub8.C
armstubs.h: armstub.C armstub7.C armstub8-32.C armstub8-32-gic.C armstub8.C armstub8-gic.C
echo 'static const unsigned armstub[] = {' > $@
cat armstub.C >> $@
echo '};' >> $@
Expand All @@ -63,7 +76,13 @@ armstubs.h: armstub.C armstub7.C armstub8-32.C armstub8.C
echo 'static const unsigned armstub8_32[] = {' >> $@
cat armstub8-32.C >> $@
echo '};' >> $@
echo 'static const unsigned armstub8_32_gic[] = {' >> $@
cat armstub8-32-gic.C >> $@
echo '};' >> $@
echo 'static const unsigned armstub8[] = {' >> $@
cat armstub8.C >> $@
echo '};' >> $@
echo 'static const unsigned armstub8_gic[] = {' >> $@
cat armstub8-gic.C >> $@
echo '};' >> $@

91 changes: 80 additions & 11 deletions armstubs/armstub7.S
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/* the vector table for secure state and HYP mode */
_start:
b jmp_loader /* reset */
#if defined(BCM2711) && (BCM2711 == 1)
osc: .word 54000000
#else
osc: .word 19200000
#endif

/*
* secure monitor handler
Expand All @@ -48,38 +52,108 @@ _secure_monitor:

movw r0, #0x1da @ Set HYP_MODE | F_BIT | I_BIT | A_BIT
msr spsr_cxfs, r0 @ Set full SPSR

#if defined(BCM2711) && (BCM2711 == 1)
mrc p15, 1, r1, c9, c0, 2 @ Read L2CTLR
orr r1, r1, #0x22 @ Set L2 read/write latency to 2

This comment has been minimized.

Copy link
@pelwell

pelwell Sep 19, 2019

Contributor

This comment is also wrong.

mcr p15, 1, r1, c9, c0, 2 @ Write L2CTLR
#endif

movs pc, lr @ return to non-secure SVC

value: .word 0x63fff
machid: .word 3138
#if defined(BCM2711) && (BCM2711 == 1)
mbox: .word 0xFF80008C
#else
mbox: .word 0x4000008C
#endif
prescaler: .word 0xff800008
GIC_DISTB: .word 0xff841000
GIC_CPUB: .word 0xff842000

#define GICC_CTRLR 0x0
#define GICC_PMR 0x4
#define IT_NR 0x7 @ Number of interrupt enable registers (256 total irqs)
#define GICD_CTRLR 0x0
#define GICD_IGROUPR 0x80

@ Shoehorn the GIC code between the reset vector and fixed-offset magic numbers at 240b

setup_gic: @ Called from secure mode - set all interrupts to group 1 and enable.
mrc p15, 0, r0, c0, c0, 5
ubfx r0, r0, #0, #2
cmp r0, #0 @ primary core
beq 2f
ldr r2, GIC_DISTB
add r2, r2, #GICD_CTRLR
mov r0, #3 @ Enable group 0 and 1 IRQs from distributor
str r0, [r2]
2:
ldr r0, GIC_CPUB
add r0, r0, #GICC_CTRLR
movw r1, #0x1e7
str r1, [r0] @ Enable group 1 IRQs from CPU interface
ldr r0, GIC_CPUB
add r0, r0, #GICC_PMR @ priority mask
movw r1, #0xff
str r1, [r0]
mov r0, #IT_NR
mov r1, #~0 @ group 1 all the things
ldr r2, GIC_DISTB
add r2, r2, #(GICD_IGROUPR)
3:
str r1, [r2]
add r2, r2, #4
sub r0, r0, #1
cmp r0, #0
bne 3b
str r1, [r2]
mov pc, lr

.org 0xf0
.word 0x5afe570b @ magic value to indicate firmware should overwrite atags and kernel
.word 0 @ version
atags: .word 0x0 @ device tree address
kernel: .word 0x0 @ kernel start address

jmp_loader:
@ Check which proc we are and run proc 0 only
#ifdef GIC
bl setup_gic
#endif

mrc p15, 0, r0, c1, c0, 0 @ Read System Control Register
orr r0, r0, #(1<<2) @ cache enable
orr r0, r0, #(1<<12) @ icache enable
mcr p15, 0, r0, c1, c0, 0 @ Write System Control Register
.if !BCM2710
mrc p15, 0, r0, c1, c0, 1 @ Read Auxiliary Control Register
orr r0, r0, #(1<<6) @ SMP
mcr p15, 0, r0, c1, c0, 1 @ Write Auxiliary Control Register
.else
mrrc p15, 1, r0, r1, c15 @ CPU Extended Control Register
orr r0, r0, #(1<<6) @ SMP
and r1, r1, #(~3) @ Set L2 load data prefetch to 0b00 = 16
mcrr p15, 1, r0, r1, c15 @ CPU Extended Control Register
.endif

mrc p15, 0, r0, c1, c0, 0 @ Read System Control Register
/* Cortex A72 manual 4.3.67 says says SMP must be set before enabling the cache. */
#ifndef BCM2711
orr r0, r0, #(1<<2) @ cache enable
#endif
orr r0, r0, #(1<<12) @ icache enable
mcr p15, 0, r0, c1, c0, 0 @ Write System Control Register
mov r0, #1
mcr p15, 0, r0, c14, c3, 1 @ CNTV_CTL (enable=1, imask=0)

@ set to non-sec
ldr r1, value @ value = 0x63fff
mcr p15, 0, r1, c1, c1, 2 @ NSACR = all copros to non-sec
@ timer frequency
ldr r1, osc @ osc = 19200000
ldr r1, osc @ osc = 19.2 / 54MHz
mcr p15, 0, r1, c14, c0, 0 @ write CNTFRQ
#if defined(BCM2711) && (BCM2711 == 1)
mov r1, #0x80000000 @ Set ARM_LOCAL_TIMER_PRE_ADD to 1
ldr r2, prescaler
str r1, [r2]
#endif

adr r1, _start
mcr p15, 0, r1, c12, c0, 1 @ set MVBAR to secure vectors
Expand Down Expand Up @@ -122,8 +196,3 @@ jmp_loader:
wfi
b 10b

.org 0xf0
.word 0x5afe570b @ magic value to indicate firmware should overwrite atags and kernel
.word 0 @ version
atags: .word 0x0 @ device tree address
kernel: .word 0x0 @ kernel start address
54 changes: 51 additions & 3 deletions armstubs/armstub8.S
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@

#define BIT(x) (1 << (x))

#define LOCAL_CONTROL 0x40000000
#define LOCAL_PRESCALER 0x40000008
#define LOCAL_CONTROL 0xff800000
#define LOCAL_PRESCALER 0xff800008
#define GIC_DISTB 0xff841000
#define GIC_CPUB 0xff842000

#define OSC_FREQ 19200000
#define OSC_FREQ 54000000

#define SCR_RW BIT(10)
#define SCR_HCE BIT(8)
Expand All @@ -54,6 +56,15 @@
#define SPSR_EL3_VAL \
(SPSR_EL3_D | SPSR_EL3_A | SPSR_EL3_I | SPSR_EL3_F | SPSR_EL3_MODE_EL2H)

#define L2CTLR_EL1 S3_1_C11_C0_2


#define GICC_CTRLR 0x0
#define GICC_PMR 0x4
#define IT_NR 0x8 // Number of interrupt enable registers (256 total irqs)
#define GICD_CTRLR 0x0
#define GICD_IGROUPR 0x80

.globl _start
_start:
/*
Expand All @@ -67,6 +78,12 @@ _start:
mov w1, 0x80000000
str w1, [x0, #(LOCAL_PRESCALER - LOCAL_CONTROL)]

/* Set L2 read/write cache latency to 2 */

This comment has been minimized.

Copy link
@wtarreau

wtarreau Sep 19, 2019

The comment says the R/W cache latency is set to 2 but the A72 TRM says:
[0:2]=2 => 3 cycles data RAM latency (2 being the detault)
[5]=1 => 1 cycle data RAM setup (0 being the default)

On the A53 it is:
[0]=1 => 3 cycles output latency (default 2)
[5]=1 => 2 cycles input latency (default 1)

http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0500d/BABDFHIH.html

Note that the A53 TRM only supports bits 0 and 5 being set, thus prevents value 0x22. I am suspecting that the intent was to set this to 0x20 which wold mean "2 cycles R/W" on the A53 and remain "1 cycle setup" and "2 cycles latency" on A72. Was this set to these values on purpose (hence the comment is wrong) or was this an accidental mistake resulting in one extra cycle RAM latency ?

This comment has been minimized.

Copy link
@wtarreau

wtarreau Sep 19, 2019

I tried replacing the value with 0x20 or 0x21 on my pi4 and the systems hangs a few seconds after boot, so I think that the value is correct and that it's the comment which is wrong, it's 3 cycles latency.

This comment has been minimized.

Copy link
@popcornmix

popcornmix Sep 19, 2019

Author Contributor

Yes, I think the value is correct but the comment is wrong.
If you wanted to submit a PR with a correction to the comment we'd be happy to accept it.

This comment has been minimized.

Copy link
@wtarreau

wtarreau Sep 19, 2019

I may possibly do it later if nobody beats me to it. PRs take quite some time just to change a single digit in two files, somebody who already has the repo cloned will do it way faster.

This comment has been minimized.

Copy link
@pelwell

pelwell Sep 23, 2019

Contributor

You don't need to clone the repo - I just patched both using the GitHub GUI in 2 1/2 minutes, the only downside being that each modified file became a separate commit.

This comment has been minimized.

Copy link
@wtarreau

wtarreau Sep 23, 2019

Thanks, I didn't know it was possible to edit from within the GUI. And indeed I wouldn't have liked to send you 2 commits for one change :-/

mrs x0, L2CTLR_EL1
mov x1, #0x22
orr x0, x0, x1
msr L2CTLR_EL1, x0

/* Set up CNTFRQ_EL0 */
ldr x0, =OSC_FREQ
msr CNTFRQ_EL0, x0
Expand All @@ -87,6 +104,9 @@ _start:
mov x0, #CPUECTLR_EL1_SMPEN
msr CPUECTLR_EL1, x0

#ifdef GIC
bl setup_gic
#endif
/*
* Set up SCTLR_EL2
* All set bits below are res1. LE, no WXN/I/SA/C/A/M
Expand Down Expand Up @@ -162,5 +182,33 @@ kernel_entry32:
.word 0x0

.org 0x100

#ifdef GIC

setup_gic: // Called from secure mode - set all interrupts to group 1 and enable.
mrs x0, MPIDR_EL1
ldr x2, =GIC_DISTB
tst x0, #0x3
b.eq 2f // primary core

mov w0, #3 // Enable group 0 and 1 IRQs from distributor
str w0, [x2, #GICD_CTRLR]
2:
add x1, x2, #(GIC_CPUB - GIC_DISTB)
mov w0, #0x1e7
str w0, [x1, #GICC_CTRLR] // Enable group 1 IRQs from CPU interface
mov w0, #0xff
str w0, [x1, #GICC_PMR] // priority mask
add x2, x2, #GICD_IGROUPR
mov x0, #(IT_NR * 4)
mov w1, #~0 // group 1 all the things
3:
subs x0, x0, #4
str w1, [x2, x0]
b.ne 3b
ret

#endif

.globl dtb_space
dtb_space:

0 comments on commit 7f4a937

Please sign in to comment.