Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sw: Support bare-metal SMP in software stack #85

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 47 additions & 3 deletions hw/bootrom/cheshire_bootrom.S
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,51 @@

// TODO: Avoid hardcoding in addresses and offsets

#include "smp.h"
// The hart that non-SMP tests should run on
#ifndef NONSMP_HART
#define NONSMP_HART 0
#endif

// Let non-SMP hart continue and all other harts jump (and loop) in smp_resume
.macro bootrom_smp_pause reg1, reg2
li \reg2, 0x8
csrw mie, \reg2
li \reg1, NONSMP_HART
csrr \reg2, mhartid
bne \reg1, \reg2, 2f
.endm

.macro bootrom_smp_resume reg1, reg2, reg3
la \reg1, __base_clint
la \reg3, __base_regs
lw \reg3, 76(\reg3) // regs.NUM_INT_HARTS
slli \reg3, \reg3, 2
add \reg3, \reg1, \reg3
1:
li \reg2, 1
sw \reg2, 0(\reg1)
addi \reg1, \reg1, 4
blt \reg1, \reg3, 1b
2:
wfi
csrr \reg2, mip
andi \reg2, \reg2, 0x8
beqz \reg2, 2b
la \reg1, __base_clint
csrr \reg2, mhartid
slli \reg2, \reg2, 2
add \reg2, \reg2, \reg1
sw zero, 0(\reg2)
la \reg3, __base_regs
lw \reg3, 76(\reg3) // regs.NUM_INT_HARTS
slli \reg3, \reg3, 2
add \reg3, \reg1, \reg3
3:
lw \reg2, 0(\reg1)
bnez \reg2, 3b
addi \reg1, \reg1, 4
blt \reg1, \reg3, 3b
.endm

.section .text._start

Expand Down Expand Up @@ -47,7 +91,7 @@ _start:
li x31, 0

// Pause SMP harts
smp_pause(t0, t1)
bootrom_smp_pause t0, t1

// Init stack and global pointer with safe, linked values
la sp, __stack_pointer$
Expand Down Expand Up @@ -94,7 +138,7 @@ boot_next_stage:
sw a0, 20(t0) // regs.SCRATCH[5]
fence
// Resume SMP harts
smp_resume(t0, t1, t2)
bootrom_smp_resume t0, t1, t2
// Load boot address from global scratch registers
la t0, __base_regs
lwu t1, 20(t0) // regs.SCRATCH[5]
Expand Down
6 changes: 4 additions & 2 deletions sw/boot/zsl.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "gpt.h"
#include "dif/uart.h"
#include "printf.h"
#include "smp.h"

// Type for firmware payload
typedef int (*payload_t)(uint64_t, uint64_t, uint64_t);
Expand Down Expand Up @@ -80,9 +81,10 @@ int main(void) {
load_part_or_spin(priv, __BOOT_FW_TYPE_GUID, __BOOT_ZSL_FW, "firmware", 8192);
}

// Launch payload
// Resume SMP and launch payload
printf("[ZSL] Launch firmware at %lx with device tree at %lx\r\n", __BOOT_ZSL_FW, __BOOT_ZSL_DTB);
smp_resume();
payload_t fw = __BOOT_ZSL_FW;
printf("[ZSL] Launch firmware at %lx with device tree at %lx\r\n", fw, __BOOT_ZSL_DTB);
fencei();
return fw(0, (uintptr_t)__BOOT_ZSL_DTB, 0);
}
Expand Down
62 changes: 20 additions & 42 deletions sw/include/smp.h
Original file line number Diff line number Diff line change
@@ -1,49 +1,27 @@
// Copyright 2023 ETH Zurich and University of Bologna.
// Copyright 2022 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Emanuele Parisi <emanuele.parisi@unibo.it>

#pragma once

// The hart that non-SMP tests should run on
#ifndef NONSMP_HART
#define NONSMP_HART 0
#endif
#include <stdint.h>
#include <stdbool.h>

// Let non-SMP hart continue and all other harts jump (and loop) in smp_resume
#define smp_pause(reg1, reg2) \
li reg2, 0x8; \
csrw mie, reg2; \
li reg1, NONSMP_HART; \
csrr reg2, mhartid; \
bne reg1, reg2, 2f
#include "util.h"
#include "regs/cheshire.h"
#include "params.h"

#define smp_resume(reg1, reg2, reg3) \
la reg1, __base_clint; \
la reg3, __base_regs; \
lw reg3, 76(reg3); /* regs.NUM_INT_HARTS */ \
slli reg3, reg3, 2; \
add reg3, reg1, reg3; \
1:; \
li reg2, 1; \
sw reg2, 0(reg1); \
addi reg1, reg1, 4; \
blt reg1, reg3, 1b; \
2:; \
wfi; \
csrr reg2, mip; \
andi reg2, reg2, 0x8; \
beqz reg2, 2b; \
la reg1, __base_clint; \
csrr reg2, mhartid; \
slli reg2, reg2, 2; \
add reg2, reg2, reg1; \
sw zero, 0(reg2); \
la reg3, __base_regs; \
lw reg3, 76(reg3); /* regs.NUM_INT_HARTS */ \
slli reg3, reg3, 2; \
add reg3, reg1, reg3; \
3:; \
lw reg2, 0(reg1); \
bnez reg2, 3b; \
addi reg1, reg1, 4; \
blt reg1, reg3, 3b
/*
* Pause all harts except for hart 0 until a IPI is received. On wake-up every
* core resumes execution from the address stored in SCRATCH[4:5] registers.
*/
void smp_pause(void);

/*
* Resume execution in all harts. This function sets SCRATCH[4:5] registers and
* sends an IPI to all harts except for hart 0. The execution resumes from the
* last instruction of this function.
*/
void smp_resume(void);
34 changes: 34 additions & 0 deletions sw/include/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ static inline void wfi() {
asm volatile("wfi" ::: "memory");
}

static inline void nop() {
asm volatile("nop" ::: "memory");
}

// Enables or disables M-mode timer interrupts.
static inline void set_mtie(int enable) {
if (enable)
Expand All @@ -38,6 +42,29 @@ static inline void set_mtie(int enable) {
asm volatile("csrc mie, %0" ::"r"(128) : "memory");
}

// Enables or disables M-mode software interrupts.
static inline void set_msie(int enable) {
if (enable)
asm volatile("csrs mie, %0" ::"r"(8) : "memory");
else
asm volatile("csrc mie, %0" ::"r"(8) : "memory");
}

// Enables or disables M-mode software interrupts pending bit.
static inline void set_msip(int enable) {
if (enable)
asm volatile("csrs mip, %0" ::"r"(8) : "memory");
else
asm volatile("csrc mip, %0" ::"r"(8) : "memory");
}

// Get M-mode software interrupts pending bit.
static inline uint64_t get_msip() {
uint64_t msip;
asm volatile("csrr %0, mip" : "=r"(msip)::"memory");
return (msip & 0x8) >> 3;
}

// Enables or disables M-mode global interrupts.
static inline void set_mie(int enable) {
if (enable)
Expand All @@ -46,6 +73,13 @@ static inline void set_mie(int enable) {
asm volatile("csrci mstatus, 8" ::: "memory");
}

// Get hart id
static inline uint64_t get_mhartid() {
uint64_t mhartid;
asm volatile("csrr %0, mhartid" : "=r"(mhartid)::"memory");
return mhartid;
}

// Get cycle count since reset
static inline uint64_t get_mcycle() {
uint64_t mcycle;
Expand Down
91 changes: 49 additions & 42 deletions sw/lib/crt0.S
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
// Nicole Narr <narrn@student.ethz.ch>
// Christopher Reinwardt <creinwar@student.ethz.ch>
// Paul Scheffler <paulsc@iis.ee.ethz.ch>
// Emanuele Parisi <emanuele.parisi@unibo.it>

.section .text._start

Expand All @@ -14,60 +15,36 @@ _start:
// Globally disable Machine and Supervisor interrupts
csrrc x0, mstatus, 10

// Park SMP harts
csrr t0, mhartid
beqz t0, 2f
1:
wfi
j 1b
2:
// Init stack and global pointer iff linked as nonzero
mv t1, sp
la t0, __stack_pointer$
beqz t0, 1f
mv sp, t0
1: .option push
_init_gp:
// Init global pointer iff linked as nonzero
.option push
.option norelax
la t0, __global_pointer$
beqz t0, 1f
beqz t0, _init_sp
mv gp, t0
1: .option pop

.option pop

_init_sp:
// Init stack pointer iff linked as nonzero
mv t0, sp
la t1, __stack_pointer$
beqz t1, _init_context
la t2, __stack_size$
csrr t3, mhartid
mul t3, t3, t2
sub sp, t1, t3

_init_context:
// Store existing stack, global, return pointers on new stack
addi sp, sp, -24
sd t1, 0(sp)
sd t0, 0(sp)
sd gp, 8(sp)
sd ra, 16(sp)

// Set trap vector
la t0, _trap_handler_wrap
csrrw x0, mtvec, t0

// Zero the .bss section
la t0, __bss_start // t0 = bss start address
la t1, __bss_end // t1 = bss end address
sub t2, t1, t0 // t2 = #bytes to zero
li a0, 0

_zero_bss_loop:
addi t4, t2, -32
blez t2, _fp_init // t2 <= 0? => No bss to zero
blt t4, x0, _zero_bss_rem // t4 < 0? => Less than 4 words left
sd a0, 0(t0)
sd a0, 8(t0)
sd a0, 16(t0)
sd a0, 24(t0)
addi t2, t2, -32
addi t0, t0, 32
bgt t2, x0, _zero_bss_loop // Still more to go
j _fp_init

_zero_bss_rem:
sb a0, 0(t0)
addi t2, t2, -1
addi t0, t0, 1
bgt t2, x0, _zero_bss_rem

_fp_init:
// Set FS state to "Initial", enabling FP instructions
li t1, 1
Expand Down Expand Up @@ -111,6 +88,36 @@ _fp_init:
// Set FS state to "Clean"
csrrc x0, mstatus, t1

_smp_park:
call smp_pause

_zero_bss_init:
// Zero the .bss section
la t0, __bss_start // t0 = bss start address
la t1, __bss_end // t1 = bss end address
sub t2, t1, t0 // t2 = #bytes to zero
li a0, 0

_zero_bss_loop:
addi t4, t2, -32
blez t2, _entry // t2 <= 0? => No bss to zero
blt t4, x0, _zero_bss_rem // t4 < 0? => Less than 4 words left
sd a0, 0(t0)
sd a0, 8(t0)
sd a0, 16(t0)
sd a0, 24(t0)
addi t2, t2, -32
addi t0, t0, 32
bgt t2, x0, _zero_bss_loop // Still more to go
j _entry

_zero_bss_rem:
sb a0, 0(t0)
addi t2, t2, -1
addi t0, t0, 1
bgt t2, x0, _zero_bss_rem

_entry:
// Full fence, then jump to main
fence
call main
Expand Down
56 changes: 56 additions & 0 deletions sw/lib/smp.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Copyright 2022 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Emanuele Parisi <emanuele.parisi@unibo.it>

#include "smp.h"

void smp_pause(void) {
uint64_t mhartid = get_mhartid();
uint64_t next_addr_lo = 0x0;
uint64_t next_addr_hi = 0x0;

fence();
if (mhartid != 0x0) {
// Enable M-mode software interrupts.
set_msie(true);

// Remain in WFI until the MSIP bit is set and clear it on wake-up.
do {
wfi();
} while (!get_msip());

// Clear MSIP bit and appropriate IPI register in the CLINT.
set_msip(false);
*reg32(&__base_clint, mhartid << 2) = 0x0;

// Read jump address from SCRATCH[4:5] registers.
next_addr_lo = (uint64_t)*reg32(&__base_regs, CHESHIRE_SCRATCH_4_REG_OFFSET);
next_addr_hi = (uint64_t)*reg32(&__base_regs, CHESHIRE_SCRATCH_5_REG_OFFSET);

// Flush i-cache and jump.
invoke((void*)(next_addr_lo | (next_addr_hi << 32)));
}
}

void smp_resume(void) {
uint64_t resume_addr = (uint64_t)(&&Lsmp_resume_target);
uint32_t resume_addr_lo = resume_addr & 0xffffffff;
uint32_t resume_addr_hi = resume_addr >> 32;
uint32_t num_harts = *reg32(&__base_regs, CHESHIRE_NUM_INT_HARTS_REG_OFFSET);

// Write resume address in SCRATCH[4:5].
*reg32(&__base_regs, CHESHIRE_SCRATCH_4_REG_OFFSET) = resume_addr_lo;
*reg32(&__base_regs, CHESHIRE_SCRATCH_5_REG_OFFSET) = resume_addr_hi;

// Flush cache and wake-up all sleeping cores.
fence();
for (uint32_t i=1; i<num_harts; i++) {
*reg32(&__base_clint, i << 2) = 0x1;
while (*reg32(&__base_clint, i << 2));
}

Lsmp_resume_target:
nop();
}
Loading