Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NVIDIA Jetson Orin support #1135

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ Upcoming release: BREAKING
* Added support for arm_hyp on qemu-arm-virt platfrom with cortex-a15 CPU
* Added support for qemu-riscv-virt
* Added support for the Pine64 Star64
* Added support for the NVIDIA Orin Jetson
* Rename libsel4 config option ENABLE_SMP_SUPPORT to CONFIG_ENABLE_SMP_SUPPORT to be namespace compliant.
* Rename libsel4 config option AARCH64_VSPACE_S2_START_L1 to CONFIG_AARCH64_VSPACE_S2_START_L1 to be namespace
compliant.
Expand Down
4 changes: 4 additions & 0 deletions configs/seL4Config.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ foreach(
KernelArmCortexA55
KernelArmCortexA57
KernelArmCortexA72
KernelArmCortexA78AE
KernelArchArmV7a
KernelArchArmV7ve
KernelArchArmV8a
Expand Down Expand Up @@ -194,6 +195,7 @@ config_set(KernelArmCortexA53 ARM_CORTEX_A53 "${KernelArmCortexA53}")
config_set(KernelArmCortexA55 ARM_CORTEX_A55 "${KernelArmCortexA55}")
config_set(KernelArmCortexA57 ARM_CORTEX_A57 "${KernelArmCortexA57}")
config_set(KernelArmCortexA72 ARM_CORTEX_A72 "${KernelArmCortexA72}")
config_set(KernelArmCortexA78AE ARM_CORTEX_A78AE "${KernelArmCortexA78AE}")
config_set(KernelArchArmV7a ARCH_ARM_V7A "${KernelArchArmV7a}")
config_set(KernelArchArmV7ve ARCH_ARM_V7VE "${KernelArchArmV7ve}")
config_set(KernelArchArmV8a ARCH_ARM_V8A "${KernelArchArmV8a}")
Expand Down Expand Up @@ -227,6 +229,8 @@ elseif(KernelArmCortexA57)
set(KernelArmCPU "cortex-a57" CACHE INTERNAL "")
elseif(KernelArmCortexA72)
set(KernelArmCPU "cortex-a72" CACHE INTERNAL "")
elseif(KernelArmCortexA78AE)
set(KernelArmCPU "cortex-a78ae" CACHE INTERNAL "")
endif()
if(KernelArchARM)
config_set(KernelArmMach ARM_MACH "${KernelArmMach}")
Expand Down
23 changes: 23 additions & 0 deletions include/arch/arm/arch/machine/gic_v3.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,29 @@

#define DEFAULT_PMR_VALUE 0xff

/* SGI1R register. */
#define ICC_SGI1R_TARGET_LIST_SHIFT 0
#define ICC_SGI1R_TARGET_LIST_MASK (0xffff << ICC_SGI1R_TARGET_LIST_SHIFT)
#define ICC_SGI1R_TARGET_LIST_VAL(sgi1r) (((sgi1r) & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT)
#define ICC_SGI1R_AFF1_SHIFT 16
#define ICC_SGI1R_AFF1_MASK (0xff << ICC_SGI1R_AFF1_SHIFT)
#define ICC_SGI1R_AFF1_VAL(sgi1r) (((sgi1r) & ICC_SGI1R_AFF1_MASK) >> ICC_SGI1R_AFF1_SHIFT)
#define ICC_SGI1R_INT_ID_SHIFT 24
#define ICC_SGI1R_INT_ID_MASK (0xfull << ICC_SGI1R_INT_ID_SHIFT)
#define ICC_SGI1R_INT_ID_VAL(sgi1r) (((sgi1r) & ICC_SGI1R_INT_ID_MASK) >> ICC_SGI1R_INT_ID_SHIFT)
#define ICC_SGI1R_AFF2_SHIFT 32
#define ICC_SGI1R_AFF2_MASK (0xffull << ICC_SGI1R_AFF2_SHIFT)
#define ICC_SGI1R_AFF2_VAL(sgi1r) (((sgi1r) & ICC_SGI1R_AFF2_MASK) >> ICC_SGI1R_AFF2_SHIFT)
#define ICC_SGI1R_IRM_SHIFT 40
#define ICC_SGI1R_IRM_MASK (0x1ull << ICC_SGI1R_IRM_SHIFT)
#define ICC_SGI1R_IRM_VAL(sgi1r) (((sgi1r) & ICC_SGI1R_IRM_MASK) >> ICC_SGI1R_IRM_SHIFT)
#define ICC_SGI1R_RS_SHIFT 44
#define ICC_SGI1R_RS_MASK (0xfull << ICC_SGI1R_RS_SHIFT)
#define ICC_SGI1R_RS_VAL(sgi1r) (((sgi1r) & ICC_SGI1R_RS_MASK) >> ICC_SGI1R_RS_SHIFT)
#define ICC_SGI1R_AFF3_SHIFT 48
#define ICC_SGI1R_AFF3_MASK (0xffull << ICC_SGI1R_AFF3_SHIFT)
#define ICC_SGI1R_AFF3_VAL(sgi1r) (((sgi1r) & ICC_SGI1R_AFF3_MASK) >> ICC_SGI1R_AFF3_SHIFT)

/* System registers for GIC CPU interface */
#ifdef CONFIG_ARCH_AARCH64
#define ICC_IAR1_EL1 "S3_0_C12_C12_0"
Expand Down
4 changes: 4 additions & 0 deletions include/arch/arm/arch/model/smp.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@
#ifdef ENABLE_SMP_SUPPORT
static inline cpu_id_t cpuIndexToID(word_t index)
{
#ifdef CONFIG_ARM_GIC_V3_SUPPORT
return index;
#else
return BIT(index);
#endif
}
Indanz marked this conversation as resolved.
Show resolved Hide resolved

static inline bool_t try_arch_atomic_exchange_rlx(void *ptr, void *new_val, void **prev)
Expand Down
28 changes: 28 additions & 0 deletions libsel4/arch_include/arm/sel4/arch/constants_cortex_a78ae.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* Copyright 2023, NIO
*
* SPDX-License-Identifier: BSD-2-Clause
*/

#pragma once

#include <sel4/config.h>

#if !defined(CONFIG_ARM_CORTEX_A78AE)
#error CONFIG_ARM_CORTEX_A78AE is not defined
#endif

/* Cortex-A78AE TRM, Section 5.3 */
#define seL4_NumHWBreakpoints 10
#define seL4_NumExclusiveBreakpoints 6
#define seL4_NumExclusiveWatchpoints 4

#ifdef CONFIG_HARDWARE_DEBUG_API

#define seL4_FirstBreakpoint 0
#define seL4_FirstWatchpoint 6

#define seL4_NumDualFunctionMonitors 0
#define seL4_FirstDualFunctionMonitor (-1)

#endif /* CONFIG_HARDWARE_DEBUG_API */
14 changes: 14 additions & 0 deletions libsel4/sel4_plat_include/orin/sel4/plat/api/constants.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/*
* Copyright 2023, NIO
* SPDX-License-Identifier: BSD-2-Clause
*/

#pragma once

#include <sel4/config.h>

#if defined(CONFIG_ARM_CORTEX_A78AE)
#include <sel4/arch/constants_cortex_a78ae.h>
#else
#error "unsupported core"
#endif
37 changes: 21 additions & 16 deletions src/arch/arm/64/kernel/vspace.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,20 @@ enum mair_s2_types {
S2_NORMAL = S2_NORMAL_INNER_WBC_OUTER_WBC
};

/* Leif from Linaro said the big.LITTLE clusters should be treated as
* inner shareable, and we believe so, although the Example B2-1 given in
* ARM ARM DDI 0487B.b (ID092517) says otherwise.
/* ARM DDI 0487J.a, section D8.5.2 */
/* Linux will only run on cores within the same Inner Shareable domain.
* They make an assumption that all the PEs are under a single IS domain,
* and we do the same here.
*
* Arm also states "Arm expects operating systems to mark the
* majority of DRAM memory as Normal Write-back cacheable, Inner
* shareable" (102376_0200_01_en version 2).
*
* Note: accoding to RPYFVQ (ARM ARM DDI 0487J.a), the shareability
* attribute for Device memory does not matter as it will instead
* be treated as Outer Shareable.
*/

#define SMP_SHARE 3
#define INNER_SHAREABLE 3

struct lookupPTSlot_ret {
pte_t *ptSlot;
Expand Down Expand Up @@ -184,13 +192,13 @@ BOOT_CODE void map_kernel_frame(paddr_t paddr, pptr_t vaddr, vm_rights_t vm_righ
word_t uxn = 1; /* unprivileged execute never */
#endif /* CONFIG_ARM_HYPERVISOR_SUPPORT */
word_t attr_index;
word_t shareable;

/* ARM ARM RPYFVQ: device memory is treated as Outer Shareable, and the PTE attribute has no effect. */
word_t shareable = INNER_SHAREABLE;
if (vm_attributes_get_armPageCacheable(attributes)) {
attr_index = NORMAL;
shareable = SMP_TERNARY(SMP_SHARE, 0);
} else {
attr_index = DEVICE_nGnRnE;
shareable = 0;
}
armKSGlobalKernelPT[GET_KPT_INDEX(vaddr, KLVL_FRM_ARM_PT_LVL(3))] = pte_pte_4k_page_new(uxn, paddr,
0, /* global */
Expand Down Expand Up @@ -244,7 +252,7 @@ BOOT_CODE void map_kernel_window(void)
paddr,
0, /* global */
1, /* access flag */
SMP_TERNARY(SMP_SHARE, 0), /* Inner-shareable if SMP enabled, otherwise unshared */
INNER_SHAREABLE,
0, /* VMKernelOnly */
NORMAL
);
Expand Down Expand Up @@ -303,7 +311,7 @@ static BOOT_CODE void map_it_frame_cap(cap_t vspace_cap, cap_t frame_cap, bool_t
1, /* not global */
#endif
1, /* access flag */
SMP_TERNARY(SMP_SHARE, 0), /* Inner-shareable if SMP enabled, otherwise unshared */
INNER_SHAREABLE,
APFromVMRights(VMReadWrite),
#ifdef CONFIG_ARM_HYPERVISOR_SUPPORT
S2_NORMAL
Expand Down Expand Up @@ -665,15 +673,12 @@ static pte_t makeUserPagePTE(paddr_t paddr, vm_rights_t vm_rights, vm_attributes
word_t attridx = cacheable ? NORMAL : DEVICE_nGnRnE;
#endif

/* Inner-shareable if SMP enabled, otherwise unshared (ignored for devices) */
word_t shareable = cacheable ? SMP_TERNARY(SMP_SHARE, 0) : 0;

if (page_size == ARMSmallPage) {
return pte_pte_4k_page_new(nonexecutable, paddr, nG, 1 /* access flag */,
shareable, APFromVMRights(vm_rights), attridx);
INNER_SHAREABLE, APFromVMRights(vm_rights), attridx);
} else {
return pte_pte_page_new(nonexecutable, paddr, nG, 1 /* access flag */,
shareable, APFromVMRights(vm_rights), attridx);
INNER_SHAREABLE, APFromVMRights(vm_rights), attridx);
}
Comment on lines -668 to 682
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the part that breaks verification currently. It would be relatively easy to update, but I'd like to understand better what we are really doing here. Why are we usually not setting inner shareable, and could setting it unconditionally have any adverse effect on older platforms?

Is there any potential interaction with device memory?

The explanation in the commit message was useful. I think it would be good to have more of it as a comment in the code (maybe where INNER_SHARABLE is defined).

Copy link
Contributor Author

@andybui01 andybui01 Feb 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any potential interaction with device memory?

None, ARM states in RPYFVQ (DDI 0487J.a) that the value doesn't matter for device memory, which it will just treat as Outer Shareable.

Why are we usually not setting inner shareable, and could setting it unconditionally have any adverse effect on older platforms?

I think the current reasoning is that it's not really needed for UP systems. Having a more permissive shareability such as inner shareable might mean wasted cycles trying to be coherent with the other PEs when it's not really needed. However, I believe that even while running seL4 in UP mode, another agent in the system may still need to be coherent with the PE/node. I have not had a deeper look as to which part of the Orin SoC might be requiring this.

I'll try and copy the explanations into the code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Example: We've had some existing problems with the Orin's System Cache. It is not an architectural cache and is shared by the CPU/GPU, making it effectively a CPU L4 cache. It may be the case that this is not within the non-shareable domain.

I use this example because we added a cache line invalidate by VA on a range of memory, which seemed to half-fix some of the instabilities we were encountering. Setting the shareability to IS fully fixed this issue and made the fix redundant.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you, that makes sense. It would be good to see if there is any measurable performance impact then, even if we're not expecting any. I guess we could just kick off a sel4bench run and see what happens on the existing platforms.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general, I believe shareability and cacheability attribute values for user frame mapping is policy that needs to be delegated to user level.

I think the current reasoning is that it's not really needed for UP systems. Having a more permissive shareability such as inner shareable might mean wasted cycles trying to be coherent with the other PEs when it's not really needed.

It can also mean that the system has to fall back to uncached memory if it doesn't provide hardware cache-coherency. So increasing the shareability from none, to inner to outer can degrade performance if it's not needed.

Example: We've had some existing problems with the Orin's System Cache. It is not an architectural cache and is shared by the CPU/GPU, making it effectively a CPU L4 cache. It may be the case that this is not within the non-shareable domain.

Wouldn't this be an issue with inner vs outer cacheability attributes rather than shareability?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

However, I believe that even while running seL4 in UP mode, another agent in the system may still need to be coherent with the PE/node.

Being cache coherent with other cores should be the default behaviour for seL4, to avoid very hard to debug surprises. So I am strongly in favour of this change.

It can also mean that the system has to fall back to uncached memory if it doesn't provide hardware cache-coherency. So increasing the shareability from none, to inner to outer can degrade performance if it's not needed.

Those pages would be marked cacheable and this change doesn't change anything for them on SMP.

These SoCs are made for coherency between cores, I don't think downgrading non-SMP performance to the same level as with SMP for some obscure corner case is a problem.

And the proper fix for that would be to extend the API and provide more fine grained cacheability and shareability attribute values to user space, like you propose.

Wouldn't this be an issue with inner vs outer cacheability attributes rather than shareability?

It does sound like that, so I'm also a bit surprised this change fixed that. Maybe the GPU is in the same cluster?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this commit be split out into a separate PR and discussed separately. It's scope is more than the Jetson Orin support.

}

Expand Down Expand Up @@ -1927,7 +1932,7 @@ exception_t benchmark_arch_map_logBuffer(word_t frame_cptr)
ksUserLogBuffer,
0, /* global */
1, /* access flag */
SMP_TERNARY(SMP_SHARE, 0), /* Inner-shareable if SMP enabled, otherwise unshared */
INNER_SHAREABLE,
0, /* VMKernelOnly */
NORMAL_WT);

Expand Down
10 changes: 5 additions & 5 deletions src/arch/arm/64/machine/capdl.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ word_t get_tcb_sp(tcb_t *tcb)

static void obj_frame_print_attrs(vm_page_size_t frameSize, paddr_t frameBase);
static void cap_frame_print_attrs_pt(pte_t *ptSlot);
static void cap_frame_print_attrs_impl(word_t SH, word_t AP, word_t NXN);
static void cap_frame_print_attrs_impl(word_t AttrIndx, word_t AP, word_t NXN);
static void cap_frame_print_attrs_vptr(word_t vptr, cap_t vspace);

static void _cap_frame_print_attrs_vptr(word_t vptr, vspace_root_t *vspaceRoot);
Expand Down Expand Up @@ -55,12 +55,12 @@ static void arm64_cap_pud_print_slots(void *pgdSlot_or_vspace, vptr_t vptr);
/* use when only have access to pte of frames */
static void cap_frame_print_attrs_pt(pte_t *ptSlot)
{
cap_frame_print_attrs_impl(pte_pte_page_ptr_get_SH(ptSlot),
cap_frame_print_attrs_impl(pte_pte_page_ptr_get_AttrIndx(ptSlot),
pte_pte_page_ptr_get_AP(ptSlot),
pte_pte_page_ptr_get_UXN(ptSlot));
}

static void cap_frame_print_attrs_impl(word_t SH, word_t AP, word_t NXN)
static void cap_frame_print_attrs_impl(word_t AttrIndx, word_t AP, word_t NXN)
{
printf("(");

Expand Down Expand Up @@ -96,8 +96,8 @@ static void cap_frame_print_attrs_impl(word_t SH, word_t AP, word_t NXN)
printf("X");
}

/* Only has effect if SMP enabled */
if (SH != SMP_TERNARY(SMP_SHARE, 0)) {
/* DEVICE_nGnRnE is the only attribute we use for uncached memory right now. */
if (AttrIndx == DEVICE_nGnRnE) {
printf(", uncached");
}

Expand Down
8 changes: 7 additions & 1 deletion src/arch/arm/config.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ elseif(KernelArmCortexA72)
# (https://developer.arm.com/documentation/100095/0001/memory-management-unit/about-the-mmu)
set(KernelArmPASizeBits44 ON)
math(EXPR KernelPaddrUserTop "(1 << 44)")
elseif(KernelArmCortexA78AE)
# Even though CortexA78AE supports 48-bits
# 44 bits PA is used
Copy link
Member

@axel-h axel-h Nov 17, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems Orin specific then? So the comment should clearly say this, or we could even have a platform-specific setting then for Orin besides the A78AE's default

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, that should be more clear, while the A78AE supports 48 bits, I haven't figured out a way to enabke this without causing a chain reaction of assertion failures within the kernel.

My understanding is that the ARM64 kernel only has 47 useable bits of physical memory unless hypervisor mode is enabled, if anyone knows the context to this please share :).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Considering the Jetson Orinmodules don't support more than 64GB, let alone 1TB, why not treat is as a Cortex-A78?

Or are you also adding lockstep support to seL4 later on?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is some discussion on 48 bit physical address space in #1175 and #1157, but the result is basically that the full 48 bit are currently not supported and would require further changes.

set(KernelArmPASizeBits44 ON)
math(EXPR KernelPaddrUserTop "(1 << 44)")
endif()
config_set(KernelArmPASizeBits40 ARM_PA_SIZE_BITS_40 "${KernelArmPASizeBits40}")
config_set(KernelArmPASizeBits44 ARM_PA_SIZE_BITS_44 "${KernelArmPASizeBits44}")
Expand Down Expand Up @@ -86,7 +91,7 @@ config_option(
"Build as Hypervisor. Utilise ARM virtualisation extensions to build the kernel as a hypervisor"
DEFAULT ${KernelSel4ArchArmHyp}
DEPENDS
"KernelArmCortexA15 OR KernelArmCortexA35 OR KernelArmCortexA57 OR KernelArmCortexA53 OR KernelArmCortexA55 OR KernelArmCortexA72"
"KernelArmCortexA15 OR KernelArmCortexA35 OR KernelArmCortexA57 OR KernelArmCortexA53 OR KernelArmCortexA55 OR KernelArmCortexA72 OR KernelArmCortexA78AE"
)

config_option(KernelArmGicV3 ARM_GIC_V3_SUPPORT "Build support for GICv3" DEFAULT OFF)
Expand Down Expand Up @@ -235,6 +240,7 @@ if(
OR KernelArmCortexA55
OR KernelArmCortexA57
OR KernelArmCortexA72
OR KernelArmCortexA78AE
)
# According to https://developer.arm.com/documentation/100095/0001/functional-description/about-the-cortex-a72-processor-functions/components-of-the-processor
# the L1 instruction on the Cortex-A72 cache has a 64-byte cache line.
Expand Down
50 changes: 16 additions & 34 deletions src/arch/arm/machine/gic_v3.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@
#define RDIST_BANK_SZ 0x00010000
/* One GICR region and one GICR_SGI region */
#define GICR_PER_CORE_SIZE (0x20000)
/* Assume 8 cores */
#define GICR_SIZE (0x100000)
/* Assume 12 cores
* NOTE: this is hardcoded to the same value for the GICR region in hardware.yml
*/
#define GICR_SIZE (0x180000)
Indanz marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps a good idea to add a compile_assert(CONFIG_MAX_NUM_NODES * GICR_PER_CORE_SIZE <= GICR_SIZE); to reduce future frustration.


#define GIC_DEADLINE_MS 2
#define GIC_REG_WIDTH 32
Expand All @@ -26,11 +28,6 @@
#define ICC_SGI1R_EL1 "p15, 0, %Q0, %R0, c12"
#endif

#define ICC_SGI1R_INTID_SHIFT (24)
#define ICC_SGI1R_AFF1_SHIFT (16)
#define ICC_SGI1R_IRM_BIT (40)
#define ICC_SGI1R_CPUTARGETLIST_MASK 0xffff

volatile struct gic_dist_map *const gic_dist = (volatile struct gic_dist_map *)(GICD_PPTR);
volatile void *const gicr_base = (volatile uint8_t *)(GICR_PPTR);

Expand Down Expand Up @@ -343,34 +340,19 @@ BOOT_CODE void cpu_initLocalIRQController(void)
}

#ifdef ENABLE_SMP_SUPPORT
#define MPIDR_MT(x) (x & BIT(24))

void ipi_send_target(irq_t irq, word_t cpuTargetList)
/* This function is called with a single node. */
void ipi_send_target(irq_t irq, word_t cpuID)
andybui01 marked this conversation as resolved.
Show resolved Hide resolved
{
uint64_t sgi1r_base = ((word_t) IRQT_TO_IRQ(irq)) << ICC_SGI1R_INTID_SHIFT;
word_t sgi1r[CONFIG_MAX_NUM_NODES];
word_t last_aff1 = 0;

for (word_t i = 0; i < CONFIG_MAX_NUM_NODES; i++) {
sgi1r[i] = 0;
if (cpuTargetList & BIT(i)) {
word_t mpidr = mpidr_map[i];
word_t aff1 = MPIDR_AFF1(mpidr);
word_t aff0 = MPIDR_AFF0(mpidr);
// AFF1 is assumed to be contiguous and less than CONFIG_MAX_NUM_NODES.
// The targets are grouped by AFF1.
assert(aff1 >= 0 && aff1 < CONFIG_MAX_NUM_NODES);
sgi1r[aff1] |= sgi1r_base | (aff1 << ICC_SGI1R_AFF1_SHIFT) | (1 << aff0);
if (aff1 > last_aff1) {
last_aff1 = aff1;
}
}
}
for (word_t i = 0; i <= last_aff1; i++) {
if (sgi1r[i] != 0) {
SYSTEM_WRITE_64(ICC_SGI1R_EL1, sgi1r[i]);
}
}
assert(cpuID < CONFIG_MAX_NUM_NODES);
word_t sgi1r_base = ((word_t) IRQT_TO_IRQ(irq)) << ICC_SGI1R_INT_ID_SHIFT;
word_t mpidr = mpidr_map[cpuID];
word_t aff0 = MPIDR_AFF0(mpidr);
word_t aff1 = MPIDR_AFF1(mpidr);
word_t aff2 = MPIDR_AFF2(mpidr);
word_t aff3 = MPIDR_AFF3(mpidr);
word_t sgi = sgi1r_base | (aff3 << ICC_SGI1R_AFF3_SHIFT) | (aff2 << ICC_SGI1R_AFF2_SHIFT);
sgi |= (aff1 << ICC_SGI1R_AFF1_SHIFT) | (1 << aff0);
SYSTEM_WRITE_64(ICC_SGI1R_EL1, sgi);
isb();
}

Expand Down
5 changes: 5 additions & 0 deletions src/drivers/serial/config.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,8 @@ register_driver(
PREFIX src/drivers/serial
CFILES "meson-gx-uart.c"
)
register_driver(
compatibility_strings "nvidia,tegra194-tcu"
PREFIX src/drivers/serial
CFILES "tegra194-tcu.c"
)
44 changes: 44 additions & 0 deletions src/drivers/serial/tegra194-tcu.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Copyright 2023, NIO
*
* SPDX-License-Identifier: GPL-2.0-only
*/

#include <config.h>
#include <stdint.h>
#include <util.h>
#include <machine/io.h>
#include <plat/machine/devices_gen.h>

#define TCU_TX_REG (0)
#define TX_NUM_BYTES_FIELD_BIT (24)
#define TX_FLUSH_BIT (26)
#define TX_INTR_TRIGGER_BIT (31)
andybui01 marked this conversation as resolved.
Show resolved Hide resolved
#define UART_REG(mmio, x) ((volatile uint32_t *)(mmio + (x)))

#ifdef CONFIG_PRINTING
void uart_drv_putchar(unsigned char c)
{
uint32_t reg_val;

/* We are writing one byte */
reg_val = (uint32_t)(1UL << TX_NUM_BYTES_FIELD_BIT);
Indanz marked this conversation as resolved.
Show resolved Hide resolved
reg_val |= BIT(TX_INTR_TRIGGER_BIT);
reg_val |= c;

if (c == '\r' || c == '\n') {
reg_val |= BIT(TX_FLUSH_BIT);
}

while (*UART_REG(UART_PPTR, TCU_TX_REG) & BIT(TX_INTR_TRIGGER_BIT));

*UART_REG(UART_PPTR, TCU_TX_REG) = reg_val;
}
#endif /* CONFIG_PRINTING */

#ifdef CONFIG_DEBUG_BUILD
unsigned char uart_drv_getchar(void)
{
return 0;
}
#endif /* CONFIG_DEBUG_BUILD */