diff --git a/hw/bootrom/cheshire_bootrom.S b/hw/bootrom/cheshire_bootrom.S index 22bbeb9f..38cc2e4d 100644 --- a/hw/bootrom/cheshire_bootrom.S +++ b/hw/bootrom/cheshire_bootrom.S @@ -8,7 +8,51 @@ // TODO: Avoid hardcoding in addresses and offsets -#include "smp.h" +// The hart that non-SMP tests should run on +#ifndef NONSMP_HART +#define NONSMP_HART 0 +#endif + +// Let non-SMP hart continue and all other harts jump (and loop) in smp_resume +.macro bootrom_smp_pause reg1, reg2 + li \reg2, 0x8 + csrw mie, \reg2 + li \reg1, NONSMP_HART + csrr \reg2, mhartid + bne \reg1, \reg2, 2f +.endm + +.macro bootrom_smp_resume reg1, reg2, reg3 + la \reg1, __base_clint + la \reg3, __base_regs + lw \reg3, 76(\reg3) // regs.NUM_INT_HARTS + slli \reg3, \reg3, 2 + add \reg3, \reg1, \reg3 +1: + li \reg2, 1 + sw \reg2, 0(\reg1) + addi \reg1, \reg1, 4 + blt \reg1, \reg3, 1b +2: + wfi + csrr \reg2, mip + andi \reg2, \reg2, 0x8 + beqz \reg2, 2b + la \reg1, __base_clint + csrr \reg2, mhartid + slli \reg2, \reg2, 2 + add \reg2, \reg2, \reg1 + sw zero, 0(\reg2) + la \reg3, __base_regs + lw \reg3, 76(\reg3) // regs.NUM_INT_HARTS + slli \reg3, \reg3, 2 + add \reg3, \reg1, \reg3 +3: + lw \reg2, 0(\reg1) + bnez \reg2, 3b + addi \reg1, \reg1, 4 + blt \reg1, \reg3, 3b +.endm .section .text._start @@ -47,7 +91,7 @@ _start: li x31, 0 // Pause SMP harts - smp_pause(t0, t1) + bootrom_smp_pause t0, t1 // Init stack and global pointer with safe, linked values la sp, __stack_pointer$ @@ -94,7 +138,7 @@ boot_next_stage: sw a0, 20(t0) // regs.SCRATCH[5] fence // Resume SMP harts - smp_resume(t0, t1, t2) + bootrom_smp_resume t0, t1, t2 // Load boot address from global scratch registers la t0, __base_regs lwu t1, 20(t0) // regs.SCRATCH[5] diff --git a/sw/boot/zsl.c b/sw/boot/zsl.c index ea85852a..de505913 100644 --- a/sw/boot/zsl.c +++ b/sw/boot/zsl.c @@ -14,6 +14,7 @@ #include "gpt.h" #include "dif/uart.h" #include "printf.h" +#include "smp.h" // Type for firmware payload typedef int (*payload_t)(uint64_t, uint64_t, uint64_t); @@ -80,9 +81,10 @@ int main(void) { load_part_or_spin(priv, __BOOT_FW_TYPE_GUID, __BOOT_ZSL_FW, "firmware", 8192); } - // Launch payload + // Resume SMP and launch payload + printf("[ZSL] Launch firmware at %lx with device tree at %lx\r\n", __BOOT_ZSL_FW, __BOOT_ZSL_DTB); + smp_resume(); payload_t fw = __BOOT_ZSL_FW; - printf("[ZSL] Launch firmware at %lx with device tree at %lx\r\n", fw, __BOOT_ZSL_DTB); fencei(); return fw(0, (uintptr_t)__BOOT_ZSL_DTB, 0); } diff --git a/sw/include/smp.h b/sw/include/smp.h index d13d8757..14e784cb 100644 --- a/sw/include/smp.h +++ b/sw/include/smp.h @@ -1,49 +1,27 @@ -// Copyright 2023 ETH Zurich and University of Bologna. +// Copyright 2022 ETH Zurich and University of Bologna. // Licensed under the Apache License, Version 2.0, see LICENSE for details. // SPDX-License-Identifier: Apache-2.0 +// +// Emanuele Parisi #pragma once -// The hart that non-SMP tests should run on -#ifndef NONSMP_HART -#define NONSMP_HART 0 -#endif +#include +#include -// Let non-SMP hart continue and all other harts jump (and loop) in smp_resume -#define smp_pause(reg1, reg2) \ - li reg2, 0x8; \ - csrw mie, reg2; \ - li reg1, NONSMP_HART; \ - csrr reg2, mhartid; \ - bne reg1, reg2, 2f +#include "util.h" +#include "regs/cheshire.h" +#include "params.h" -#define smp_resume(reg1, reg2, reg3) \ - la reg1, __base_clint; \ - la reg3, __base_regs; \ - lw reg3, 76(reg3); /* regs.NUM_INT_HARTS */ \ - slli reg3, reg3, 2; \ - add reg3, reg1, reg3; \ - 1:; \ - li reg2, 1; \ - sw reg2, 0(reg1); \ - addi reg1, reg1, 4; \ - blt reg1, reg3, 1b; \ - 2:; \ - wfi; \ - csrr reg2, mip; \ - andi reg2, reg2, 0x8; \ - beqz reg2, 2b; \ - la reg1, __base_clint; \ - csrr reg2, mhartid; \ - slli reg2, reg2, 2; \ - add reg2, reg2, reg1; \ - sw zero, 0(reg2); \ - la reg3, __base_regs; \ - lw reg3, 76(reg3); /* regs.NUM_INT_HARTS */ \ - slli reg3, reg3, 2; \ - add reg3, reg1, reg3; \ - 3:; \ - lw reg2, 0(reg1); \ - bnez reg2, 3b; \ - addi reg1, reg1, 4; \ - blt reg1, reg3, 3b +/* + * Pause all harts except for hart 0 until a IPI is received. On wake-up every + * core resumes execution from the address stored in SCRATCH[4:5] registers. + */ +void smp_pause(void); + +/* + * Resume execution in all harts. This function sets SCRATCH[4:5] registers and + * sends an IPI to all harts except for hart 0. The execution resumes from the + * last instruction of this function. + */ +void smp_resume(void); diff --git a/sw/include/util.h b/sw/include/util.h index 46dcedb2..b4b9eae0 100644 --- a/sw/include/util.h +++ b/sw/include/util.h @@ -30,6 +30,10 @@ static inline void wfi() { asm volatile("wfi" ::: "memory"); } +static inline void nop() { + asm volatile("nop" ::: "memory"); +} + // Enables or disables M-mode timer interrupts. static inline void set_mtie(int enable) { if (enable) @@ -38,6 +42,29 @@ static inline void set_mtie(int enable) { asm volatile("csrc mie, %0" ::"r"(128) : "memory"); } +// Enables or disables M-mode software interrupts. +static inline void set_msie(int enable) { + if (enable) + asm volatile("csrs mie, %0" ::"r"(8) : "memory"); + else + asm volatile("csrc mie, %0" ::"r"(8) : "memory"); +} + +// Enables or disables M-mode software interrupts pending bit. +static inline void set_msip(int enable) { + if (enable) + asm volatile("csrs mip, %0" ::"r"(8) : "memory"); + else + asm volatile("csrc mip, %0" ::"r"(8) : "memory"); +} + +// Get M-mode software interrupts pending bit. +static inline uint64_t get_msip() { + uint64_t msip; + asm volatile("csrr %0, mip" : "=r"(msip)::"memory"); + return (msip & 0x8) >> 3; +} + // Enables or disables M-mode global interrupts. static inline void set_mie(int enable) { if (enable) @@ -46,6 +73,13 @@ static inline void set_mie(int enable) { asm volatile("csrci mstatus, 8" ::: "memory"); } +// Get hart id +static inline uint64_t get_mhartid() { + uint64_t mhartid; + asm volatile("csrr %0, mhartid" : "=r"(mhartid)::"memory"); + return mhartid; +} + // Get cycle count since reset static inline uint64_t get_mcycle() { uint64_t mcycle; diff --git a/sw/lib/crt0.S b/sw/lib/crt0.S index ebf372ad..62f9036c 100644 --- a/sw/lib/crt0.S +++ b/sw/lib/crt0.S @@ -5,6 +5,7 @@ // Nicole Narr // Christopher Reinwardt // Paul Scheffler +// Emanuele Parisi .section .text._start @@ -14,28 +15,29 @@ _start: // Globally disable Machine and Supervisor interrupts csrrc x0, mstatus, 10 - // Park SMP harts - csrr t0, mhartid - beqz t0, 2f -1: - wfi - j 1b -2: - // Init stack and global pointer iff linked as nonzero - mv t1, sp - la t0, __stack_pointer$ - beqz t0, 1f - mv sp, t0 -1: .option push +_init_gp: + // Init global pointer iff linked as nonzero + .option push .option norelax la t0, __global_pointer$ - beqz t0, 1f + beqz t0, _init_sp mv gp, t0 -1: .option pop - + .option pop + +_init_sp: + // Init stack pointer iff linked as nonzero + mv t0, sp + la t1, __stack_pointer$ + beqz t1, _init_context + la t2, __stack_size$ + csrr t3, mhartid + mul t3, t3, t2 + sub sp, t1, t3 + +_init_context: // Store existing stack, global, return pointers on new stack addi sp, sp, -24 - sd t1, 0(sp) + sd t0, 0(sp) sd gp, 8(sp) sd ra, 16(sp) @@ -43,31 +45,6 @@ _start: la t0, _trap_handler_wrap csrrw x0, mtvec, t0 - // Zero the .bss section - la t0, __bss_start // t0 = bss start address - la t1, __bss_end // t1 = bss end address - sub t2, t1, t0 // t2 = #bytes to zero - li a0, 0 - -_zero_bss_loop: - addi t4, t2, -32 - blez t2, _fp_init // t2 <= 0? => No bss to zero - blt t4, x0, _zero_bss_rem // t4 < 0? => Less than 4 words left - sd a0, 0(t0) - sd a0, 8(t0) - sd a0, 16(t0) - sd a0, 24(t0) - addi t2, t2, -32 - addi t0, t0, 32 - bgt t2, x0, _zero_bss_loop // Still more to go - j _fp_init - -_zero_bss_rem: - sb a0, 0(t0) - addi t2, t2, -1 - addi t0, t0, 1 - bgt t2, x0, _zero_bss_rem - _fp_init: // Set FS state to "Initial", enabling FP instructions li t1, 1 @@ -111,6 +88,36 @@ _fp_init: // Set FS state to "Clean" csrrc x0, mstatus, t1 +_smp_park: + call smp_pause + +_zero_bss_init: + // Zero the .bss section + la t0, __bss_start // t0 = bss start address + la t1, __bss_end // t1 = bss end address + sub t2, t1, t0 // t2 = #bytes to zero + li a0, 0 + +_zero_bss_loop: + addi t4, t2, -32 + blez t2, _entry // t2 <= 0? => No bss to zero + blt t4, x0, _zero_bss_rem // t4 < 0? => Less than 4 words left + sd a0, 0(t0) + sd a0, 8(t0) + sd a0, 16(t0) + sd a0, 24(t0) + addi t2, t2, -32 + addi t0, t0, 32 + bgt t2, x0, _zero_bss_loop // Still more to go + j _entry + +_zero_bss_rem: + sb a0, 0(t0) + addi t2, t2, -1 + addi t0, t0, 1 + bgt t2, x0, _zero_bss_rem + +_entry: // Full fence, then jump to main fence call main diff --git a/sw/lib/smp.c b/sw/lib/smp.c new file mode 100644 index 00000000..a0e69ac5 --- /dev/null +++ b/sw/lib/smp.c @@ -0,0 +1,56 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Emanuele Parisi + +#include "smp.h" + +void smp_pause(void) { + uint64_t mhartid = get_mhartid(); + uint64_t next_addr_lo = 0x0; + uint64_t next_addr_hi = 0x0; + + fence(); + if (mhartid != 0x0) { + // Enable M-mode software interrupts. + set_msie(true); + + // Remain in WFI until the MSIP bit is set and clear it on wake-up. + do { + wfi(); + } while (!get_msip()); + + // Clear MSIP bit and appropriate IPI register in the CLINT. + set_msip(false); + *reg32(&__base_clint, mhartid << 2) = 0x0; + + // Read jump address from SCRATCH[4:5] registers. + next_addr_lo = (uint64_t)*reg32(&__base_regs, CHESHIRE_SCRATCH_4_REG_OFFSET); + next_addr_hi = (uint64_t)*reg32(&__base_regs, CHESHIRE_SCRATCH_5_REG_OFFSET); + + // Flush i-cache and jump. + invoke((void*)(next_addr_lo | (next_addr_hi << 32))); + } +} + +void smp_resume(void) { + uint64_t resume_addr = (uint64_t)(&&Lsmp_resume_target); + uint32_t resume_addr_lo = resume_addr & 0xffffffff; + uint32_t resume_addr_hi = resume_addr >> 32; + uint32_t num_harts = *reg32(&__base_regs, CHESHIRE_NUM_INT_HARTS_REG_OFFSET); + + // Write resume address in SCRATCH[4:5]. + *reg32(&__base_regs, CHESHIRE_SCRATCH_4_REG_OFFSET) = resume_addr_lo; + *reg32(&__base_regs, CHESHIRE_SCRATCH_5_REG_OFFSET) = resume_addr_hi; + + // Flush cache and wake-up all sleeping cores. + fence(); + for (uint32_t i=1; i +// Christopher Reinwardt +// Emanuele Parisi +// +// Simple SMP Hello World. + +#include "regs/cheshire.h" +#include "dif/clint.h" +#include "dif/uart.h" +#include "params.h" +#include "util.h" +#include "smp.h" +#include "printf.h" + +uint32_t __attribute__((section(".data"))) semaphore = 0x0; + +void semaphore_wait() { + asm volatile ( + " li t0, 1 \n" + "1: \n" + " amoswap.w.aq t0, t0, (%0) \n" + " bnez t0, 1b \n" + ::"r"(&semaphore) + ); +} + +void semaphore_post() { + asm volatile ( + " amoswap.w.rl zero, zero, (%0) \n" + ::"r"(&semaphore) + ); +} + +int main(void) { + uint32_t rtc_freq = *reg32(&__base_regs, CHESHIRE_RTC_FREQ_REG_OFFSET); + uint64_t reset_freq = clint_get_core_freq(rtc_freq, 2500); + uart_init(&__base_uart, reset_freq, 115200); + + smp_resume(); + + for (uint64_t i=0; i<10; i++) { + semaphore_wait(); + printf("Hello World! - (%d, %d)\r\n", get_mhartid(), i); + semaphore_post(); + } + + return 0; +}