seL4 · andybui01 · Nov 9, 2023 · Nov 10, 2023 · Nov 9, 2023 · Nov 16, 2023
diff --git a/CHANGES b/CHANGES
@@ -32,6 +32,7 @@ Upcoming release: BREAKING
 * Added support for arm_hyp on qemu-arm-virt platfrom with cortex-a15 CPU
 * Added support for qemu-riscv-virt
 * Added support for the Pine64 Star64
+* Added support for the NVIDIA Orin Jetson
 * Rename libsel4 config option ENABLE_SMP_SUPPORT to CONFIG_ENABLE_SMP_SUPPORT to be namespace compliant.
 * Rename libsel4 config option AARCH64_VSPACE_S2_START_L1 to CONFIG_AARCH64_VSPACE_S2_START_L1 to be namespace
   compliant.

diff --git a/configs/seL4Config.cmake b/configs/seL4Config.cmake
@@ -147,6 +147,7 @@ foreach(
     KernelArmCortexA55
     KernelArmCortexA57
     KernelArmCortexA72
+    KernelArmCortexA78AE
     KernelArchArmV7a
     KernelArchArmV7ve
     KernelArchArmV8a
@@ -194,6 +195,7 @@ config_set(KernelArmCortexA53 ARM_CORTEX_A53 "${KernelArmCortexA53}")
 config_set(KernelArmCortexA55 ARM_CORTEX_A55 "${KernelArmCortexA55}")
 config_set(KernelArmCortexA57 ARM_CORTEX_A57 "${KernelArmCortexA57}")
 config_set(KernelArmCortexA72 ARM_CORTEX_A72 "${KernelArmCortexA72}")
+config_set(KernelArmCortexA78AE ARM_CORTEX_A78AE "${KernelArmCortexA78AE}")
 config_set(KernelArchArmV7a ARCH_ARM_V7A "${KernelArchArmV7a}")
 config_set(KernelArchArmV7ve ARCH_ARM_V7VE "${KernelArchArmV7ve}")
 config_set(KernelArchArmV8a ARCH_ARM_V8A "${KernelArchArmV8a}")
@@ -227,6 +229,8 @@ elseif(KernelArmCortexA57)
     set(KernelArmCPU "cortex-a57" CACHE INTERNAL "")
 elseif(KernelArmCortexA72)
     set(KernelArmCPU "cortex-a72" CACHE INTERNAL "")
+elseif(KernelArmCortexA78AE)
+    set(KernelArmCPU "cortex-a78ae" CACHE INTERNAL "")
 endif()
 if(KernelArchARM)
     config_set(KernelArmMach ARM_MACH "${KernelArmMach}")

diff --git a/include/arch/arm/arch/machine/gic_v3.h b/include/arch/arm/arch/machine/gic_v3.h
@@ -49,6 +49,29 @@
 
 #define DEFAULT_PMR_VALUE            0xff
 
+/* SGI1R register. */
+#define ICC_SGI1R_TARGET_LIST_SHIFT         0
+#define ICC_SGI1R_TARGET_LIST_MASK          (0xffff << ICC_SGI1R_TARGET_LIST_SHIFT)
+#define ICC_SGI1R_TARGET_LIST_VAL(sgi1r)    (((sgi1r) & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT)
+#define ICC_SGI1R_AFF1_SHIFT            16
+#define ICC_SGI1R_AFF1_MASK             (0xff << ICC_SGI1R_AFF1_SHIFT)
+#define ICC_SGI1R_AFF1_VAL(sgi1r)       (((sgi1r) & ICC_SGI1R_AFF1_MASK) >> ICC_SGI1R_AFF1_SHIFT)
+#define ICC_SGI1R_INT_ID_SHIFT          24
+#define ICC_SGI1R_INT_ID_MASK           (0xfull << ICC_SGI1R_INT_ID_SHIFT)
+#define ICC_SGI1R_INT_ID_VAL(sgi1r)     (((sgi1r) & ICC_SGI1R_INT_ID_MASK) >> ICC_SGI1R_INT_ID_SHIFT)
+#define ICC_SGI1R_AFF2_SHIFT            32
+#define ICC_SGI1R_AFF2_MASK             (0xffull << ICC_SGI1R_AFF2_SHIFT)
+#define ICC_SGI1R_AFF2_VAL(sgi1r)       (((sgi1r) & ICC_SGI1R_AFF2_MASK) >> ICC_SGI1R_AFF2_SHIFT)
+#define ICC_SGI1R_IRM_SHIFT             40
+#define ICC_SGI1R_IRM_MASK              (0x1ull << ICC_SGI1R_IRM_SHIFT)
+#define ICC_SGI1R_IRM_VAL(sgi1r)        (((sgi1r) & ICC_SGI1R_IRM_MASK) >> ICC_SGI1R_IRM_SHIFT)
+#define ICC_SGI1R_RS_SHIFT              44
+#define ICC_SGI1R_RS_MASK               (0xfull << ICC_SGI1R_RS_SHIFT)
+#define ICC_SGI1R_RS_VAL(sgi1r)         (((sgi1r) & ICC_SGI1R_RS_MASK) >> ICC_SGI1R_RS_SHIFT)
+#define ICC_SGI1R_AFF3_SHIFT            48
+#define ICC_SGI1R_AFF3_MASK             (0xffull << ICC_SGI1R_AFF3_SHIFT)
+#define ICC_SGI1R_AFF3_VAL(sgi1r)       (((sgi1r) & ICC_SGI1R_AFF3_MASK) >> ICC_SGI1R_AFF3_SHIFT)
+
 /* System registers for GIC CPU interface */
 #ifdef CONFIG_ARCH_AARCH64
 #define ICC_IAR1_EL1    "S3_0_C12_C12_0"

diff --git a/include/arch/arm/arch/model/smp.h b/include/arch/arm/arch/model/smp.h
@@ -13,7 +13,11 @@
 #ifdef ENABLE_SMP_SUPPORT
 static inline cpu_id_t cpuIndexToID(word_t index)
 {
+#ifdef CONFIG_ARM_GIC_V3_SUPPORT
+    return index;
+#else
     return BIT(index);
+#endif
 }
 
 static inline bool_t try_arch_atomic_exchange_rlx(void *ptr, void *new_val, void **prev)

diff --git a/libsel4/arch_include/arm/sel4/arch/constants_cortex_a78ae.h b/libsel4/arch_include/arm/sel4/arch/constants_cortex_a78ae.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2023, NIO
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <sel4/config.h>
+
+#if !defined(CONFIG_ARM_CORTEX_A78AE)
+#error CONFIG_ARM_CORTEX_A78AE is not defined
+#endif
+
+/* Cortex-A78AE TRM, Section 5.3 */
+#define seL4_NumHWBreakpoints           10
+#define seL4_NumExclusiveBreakpoints    6
+#define seL4_NumExclusiveWatchpoints    4
+
+#ifdef CONFIG_HARDWARE_DEBUG_API
+
+#define seL4_FirstBreakpoint            0
+#define seL4_FirstWatchpoint            6
+
+#define seL4_NumDualFunctionMonitors    0
+#define seL4_FirstDualFunctionMonitor   (-1)
+
+#endif /* CONFIG_HARDWARE_DEBUG_API */
diff --git a/libsel4/sel4_plat_include/orin/sel4/plat/api/constants.h b/libsel4/sel4_plat_include/orin/sel4/plat/api/constants.h
@@ -0,0 +1,14 @@
+/*
+ * Copyright 2023, NIO
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <sel4/config.h>
+
+#if defined(CONFIG_ARM_CORTEX_A78AE)
+#include <sel4/arch/constants_cortex_a78ae.h>
+#else
+#error "unsupported core"
+#endif
diff --git a/src/arch/arm/64/kernel/vspace.c b/src/arch/arm/64/kernel/vspace.c
@@ -68,12 +68,20 @@ enum mair_s2_types {
     S2_NORMAL = S2_NORMAL_INNER_WBC_OUTER_WBC
 };
 
-/* Leif from Linaro said the big.LITTLE clusters should be treated as
- * inner shareable, and we believe so, although the Example B2-1 given in
- * ARM ARM DDI 0487B.b (ID092517) says otherwise.
+/* ARM DDI 0487J.a, section D8.5.2 */
+/* Linux will only run on cores within the same Inner Shareable domain.
+ * They make an assumption that all the PEs are under a single IS domain,
+ * and we do the same here.
+ *
+ * Arm also states "Arm expects operating systems to mark the
+ * majority of DRAM memory as Normal Write-back cacheable, Inner
+ * shareable" (102376_0200_01_en version 2).
+ *
+ * Note: accoding to RPYFVQ (ARM ARM DDI 0487J.a), the shareability
+ * attribute for Device memory does not matter as it will instead
+ * be treated as Outer Shareable.
  */
-
-#define SMP_SHARE   3
+#define INNER_SHAREABLE     3
 
 struct lookupPTSlot_ret {
     pte_t *ptSlot;
@@ -184,13 +192,13 @@ BOOT_CODE void map_kernel_frame(paddr_t paddr, pptr_t vaddr, vm_rights_t vm_righ
     word_t uxn = 1; /* unprivileged execute never */
 #endif /* CONFIG_ARM_HYPERVISOR_SUPPORT */
     word_t attr_index;
-    word_t shareable;
+
+    /* ARM ARM RPYFVQ: device memory is treated as Outer Shareable, and the PTE attribute has no effect. */
+    word_t shareable = INNER_SHAREABLE;
     if (vm_attributes_get_armPageCacheable(attributes)) {
         attr_index = NORMAL;
-        shareable = SMP_TERNARY(SMP_SHARE, 0);
     } else {
         attr_index = DEVICE_nGnRnE;
-        shareable = 0;
     }
     armKSGlobalKernelPT[GET_KPT_INDEX(vaddr, KLVL_FRM_ARM_PT_LVL(3))] = pte_pte_4k_page_new(uxn, paddr,
                                                                                             0, /* global */
@@ -244,7 +252,7 @@ BOOT_CODE void map_kernel_window(void)
                                                                                                                         paddr,
                                                                                                                         0,                        /* global */
                                                                                                                         1,                        /* access flag */
-                                                                                                                        SMP_TERNARY(SMP_SHARE, 0),        /* Inner-shareable if SMP enabled, otherwise unshared */
+                                                                                                                        INNER_SHAREABLE,
                                                                                                                         0,                        /* VMKernelOnly */
                                                                                                                         NORMAL
                                                                                                                     );
@@ -303,7 +311,7 @@ static BOOT_CODE void map_it_frame_cap(cap_t vspace_cap, cap_t frame_cap, bool_t
                                                               1,                              /* not global */
 #endif
                                                               1,                              /* access flag */
-                                                              SMP_TERNARY(SMP_SHARE, 0),              /* Inner-shareable if SMP enabled, otherwise unshared */
+                                                              INNER_SHAREABLE,
                                                               APFromVMRights(VMReadWrite),
 #ifdef CONFIG_ARM_HYPERVISOR_SUPPORT
                                                               S2_NORMAL
@@ -665,15 +673,12 @@ static pte_t makeUserPagePTE(paddr_t paddr, vm_rights_t vm_rights, vm_attributes
     word_t attridx = cacheable ? NORMAL : DEVICE_nGnRnE;
 #endif
 
-    /* Inner-shareable if SMP enabled, otherwise unshared (ignored for devices) */
-    word_t shareable = cacheable ? SMP_TERNARY(SMP_SHARE, 0) : 0;
-
     if (page_size == ARMSmallPage) {
         return pte_pte_4k_page_new(nonexecutable, paddr, nG, 1 /* access flag */,
-                                   shareable, APFromVMRights(vm_rights), attridx);
+                                   INNER_SHAREABLE, APFromVMRights(vm_rights), attridx);
     } else {
         return pte_pte_page_new(nonexecutable, paddr, nG, 1 /* access flag */,
-                                shareable, APFromVMRights(vm_rights), attridx);
+                                INNER_SHAREABLE, APFromVMRights(vm_rights), attridx);
     }
 }
 
@@ -1927,7 +1932,7 @@ exception_t benchmark_arch_map_logBuffer(word_t frame_cptr)
                              ksUserLogBuffer,
                              0,                         /* global */
                              1,                         /* access flag */
-                             SMP_TERNARY(SMP_SHARE, 0), /* Inner-shareable if SMP enabled, otherwise unshared */
+                             INNER_SHAREABLE,
                              0,                         /* VMKernelOnly */
                              NORMAL_WT);
 

diff --git a/src/arch/arm/64/machine/capdl.c b/src/arch/arm/64/machine/capdl.c
@@ -21,7 +21,7 @@ word_t get_tcb_sp(tcb_t *tcb)
 
 static void obj_frame_print_attrs(vm_page_size_t frameSize, paddr_t frameBase);
 static void cap_frame_print_attrs_pt(pte_t *ptSlot);
-static void cap_frame_print_attrs_impl(word_t SH, word_t AP, word_t NXN);
+static void cap_frame_print_attrs_impl(word_t AttrIndx, word_t AP, word_t NXN);
 static void cap_frame_print_attrs_vptr(word_t vptr, cap_t vspace);
 
 static void _cap_frame_print_attrs_vptr(word_t vptr, vspace_root_t *vspaceRoot);
@@ -55,12 +55,12 @@ static void arm64_cap_pud_print_slots(void *pgdSlot_or_vspace, vptr_t vptr);
 /* use when only have access to pte of frames */
 static void cap_frame_print_attrs_pt(pte_t *ptSlot)
 {
-    cap_frame_print_attrs_impl(pte_pte_page_ptr_get_SH(ptSlot),
+    cap_frame_print_attrs_impl(pte_pte_page_ptr_get_AttrIndx(ptSlot),
                                pte_pte_page_ptr_get_AP(ptSlot),
                                pte_pte_page_ptr_get_UXN(ptSlot));
 }
 
-static void cap_frame_print_attrs_impl(word_t SH, word_t AP, word_t NXN)
+static void cap_frame_print_attrs_impl(word_t AttrIndx, word_t AP, word_t NXN)
 {
     printf("(");
 
@@ -96,8 +96,8 @@ static void cap_frame_print_attrs_impl(word_t SH, word_t AP, word_t NXN)
         printf("X");
     }
 
-    /* Only has effect if SMP enabled */
-    if (SH != SMP_TERNARY(SMP_SHARE, 0)) {
+    /* DEVICE_nGnRnE is the only attribute we use for uncached memory right now. */
+    if (AttrIndx == DEVICE_nGnRnE) {
         printf(", uncached");
     }
 

diff --git a/src/arch/arm/config.cmake b/src/arch/arm/config.cmake
@@ -32,6 +32,11 @@ elseif(KernelArmCortexA72)
     # (https://developer.arm.com/documentation/100095/0001/memory-management-unit/about-the-mmu)
     set(KernelArmPASizeBits44 ON)
     math(EXPR KernelPaddrUserTop "(1 << 44)")
+elseif(KernelArmCortexA78AE)
+    # Even though CortexA78AE supports 48-bits
+    # 44 bits PA is used
+    set(KernelArmPASizeBits44 ON)
+    math(EXPR KernelPaddrUserTop "(1 << 44)")
 endif()
 config_set(KernelArmPASizeBits40 ARM_PA_SIZE_BITS_40 "${KernelArmPASizeBits40}")
 config_set(KernelArmPASizeBits44 ARM_PA_SIZE_BITS_44 "${KernelArmPASizeBits44}")
@@ -86,7 +91,7 @@ config_option(
     "Build as Hypervisor. Utilise ARM virtualisation extensions to build the kernel as a hypervisor"
     DEFAULT ${KernelSel4ArchArmHyp}
     DEPENDS
-        "KernelArmCortexA15 OR KernelArmCortexA35 OR KernelArmCortexA57 OR KernelArmCortexA53 OR KernelArmCortexA55 OR KernelArmCortexA72"
+        "KernelArmCortexA15 OR KernelArmCortexA35 OR KernelArmCortexA57 OR KernelArmCortexA53 OR KernelArmCortexA55 OR KernelArmCortexA72 OR KernelArmCortexA78AE"
 )
 
 config_option(KernelArmGicV3 ARM_GIC_V3_SUPPORT "Build support for GICv3" DEFAULT OFF)
@@ -235,6 +240,7 @@ if(
     OR KernelArmCortexA55
     OR KernelArmCortexA57
     OR KernelArmCortexA72
+    OR KernelArmCortexA78AE
 )
     # According to https://developer.arm.com/documentation/100095/0001/functional-description/about-the-cortex-a72-processor-functions/components-of-the-processor
     # the L1 instruction on the Cortex-A72 cache has a 64-byte cache line.

diff --git a/src/arch/arm/machine/gic_v3.c b/src/arch/arm/machine/gic_v3.c
@@ -14,8 +14,10 @@
 #define RDIST_BANK_SZ 0x00010000
 /* One GICR region and one GICR_SGI region */
 #define GICR_PER_CORE_SIZE  (0x20000)
-/* Assume 8 cores */
-#define GICR_SIZE           (0x100000)
+/* Assume 12 cores
+ * NOTE: this is hardcoded to the same value for the GICR region in hardware.yml
+ */
+#define GICR_SIZE           (0x180000)
 
 #define GIC_DEADLINE_MS 2
 #define GIC_REG_WIDTH   32
@@ -26,11 +28,6 @@
 #define ICC_SGI1R_EL1 "p15, 0, %Q0, %R0, c12"
 #endif
 
-#define ICC_SGI1R_INTID_SHIFT          (24)
-#define ICC_SGI1R_AFF1_SHIFT           (16)
-#define ICC_SGI1R_IRM_BIT              (40)
-#define ICC_SGI1R_CPUTARGETLIST_MASK   0xffff
-
 volatile struct gic_dist_map *const gic_dist = (volatile struct gic_dist_map *)(GICD_PPTR);
 volatile void *const gicr_base = (volatile uint8_t *)(GICR_PPTR);
 
@@ -343,34 +340,19 @@ BOOT_CODE void cpu_initLocalIRQController(void)
 }
 
 #ifdef ENABLE_SMP_SUPPORT
-#define MPIDR_MT(x)   (x & BIT(24))
-
-void ipi_send_target(irq_t irq, word_t cpuTargetList)
+/* This function is called with a single node. */
+void ipi_send_target(irq_t irq, word_t cpuID)
 {
-    uint64_t sgi1r_base = ((word_t) IRQT_TO_IRQ(irq)) << ICC_SGI1R_INTID_SHIFT;
-    word_t sgi1r[CONFIG_MAX_NUM_NODES];
-    word_t last_aff1 = 0;
-
-    for (word_t i = 0; i < CONFIG_MAX_NUM_NODES; i++) {
-        sgi1r[i] = 0;
-        if (cpuTargetList & BIT(i)) {
-            word_t mpidr = mpidr_map[i];
-            word_t aff1 = MPIDR_AFF1(mpidr);
-            word_t aff0 = MPIDR_AFF0(mpidr);
-            // AFF1 is assumed to be contiguous and less than CONFIG_MAX_NUM_NODES.
-            // The targets are grouped by AFF1.
-            assert(aff1 >= 0 && aff1 < CONFIG_MAX_NUM_NODES);
-            sgi1r[aff1] |= sgi1r_base | (aff1 << ICC_SGI1R_AFF1_SHIFT) | (1 << aff0);
-            if (aff1 > last_aff1) {
-                last_aff1 = aff1;
-            }
-        }
-    }
-    for (word_t i = 0; i <= last_aff1; i++) {
-        if (sgi1r[i] != 0) {
-            SYSTEM_WRITE_64(ICC_SGI1R_EL1, sgi1r[i]);
-        }
-    }
+    assert(cpuID < CONFIG_MAX_NUM_NODES);
+    word_t sgi1r_base = ((word_t) IRQT_TO_IRQ(irq)) << ICC_SGI1R_INT_ID_SHIFT;
+    word_t mpidr = mpidr_map[cpuID];
+    word_t aff0  = MPIDR_AFF0(mpidr);
+    word_t aff1  = MPIDR_AFF1(mpidr);
+    word_t aff2  = MPIDR_AFF2(mpidr);
+    word_t aff3  = MPIDR_AFF3(mpidr);
+    word_t sgi = sgi1r_base | (aff3 << ICC_SGI1R_AFF3_SHIFT) | (aff2 << ICC_SGI1R_AFF2_SHIFT);
+    sgi |= (aff1 << ICC_SGI1R_AFF1_SHIFT) | (1 << aff0);
+    SYSTEM_WRITE_64(ICC_SGI1R_EL1, sgi);
     isb();
 }
 

diff --git a/src/drivers/serial/config.cmake b/src/drivers/serial/config.cmake
@@ -44,3 +44,8 @@ register_driver(
     PREFIX src/drivers/serial
     CFILES "meson-gx-uart.c"
 )
+register_driver(
+    compatibility_strings "nvidia,tegra194-tcu"
+    PREFIX src/drivers/serial
+    CFILES "tegra194-tcu.c"
+)
diff --git a/src/drivers/serial/tegra194-tcu.c b/src/drivers/serial/tegra194-tcu.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2023, NIO
+ *
+ * SPDX-License-Identifier: GPL-2.0-only
+ */
+
+#include <config.h>
+#include <stdint.h>
+#include <util.h>
+#include <machine/io.h>
+#include <plat/machine/devices_gen.h>
+
+#define TCU_TX_REG              (0)
+#define TX_NUM_BYTES_FIELD_BIT  (24)
+#define TX_FLUSH_BIT            (26)
+#define TX_INTR_TRIGGER_BIT     (31)
+#define UART_REG(mmio, x)       ((volatile uint32_t *)(mmio + (x)))
+
+#ifdef CONFIG_PRINTING
+void uart_drv_putchar(unsigned char c)
+{
+    uint32_t reg_val;
+
+    /* We are writing one byte */
+    reg_val = (uint32_t)(1UL << TX_NUM_BYTES_FIELD_BIT);
+    reg_val |= BIT(TX_INTR_TRIGGER_BIT);
+    reg_val |= c;
+
+    if (c == '\r' || c == '\n') {
+        reg_val |= BIT(TX_FLUSH_BIT);
+    }
+
+    while (*UART_REG(UART_PPTR, TCU_TX_REG) & BIT(TX_INTR_TRIGGER_BIT));
+
+    *UART_REG(UART_PPTR, TCU_TX_REG) = reg_val;
+}
+#endif /* CONFIG_PRINTING */
+
+#ifdef CONFIG_DEBUG_BUILD
+unsigned char uart_drv_getchar(void)
+{
+    return 0;
+}
+#endif /* CONFIG_DEBUG_BUILD */