Skip to content

Commit 0837a10

Browse files
authored
Merge 06afb0f into 28eb75e
2 parents 28eb75e + 06afb0f commit 0837a10

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

74 files changed

+1416
-560
lines changed

Documentation/core-api/printk-formats.rst

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,12 +209,17 @@ Struct Resources
209209
::
210210

211211
%pr [mem 0x60000000-0x6fffffff flags 0x2200] or
212+
[mem 0x60000000 flags 0x2200] or
212213
[mem 0x0000000060000000-0x000000006fffffff flags 0x2200]
214+
[mem 0x0000000060000000 flags 0x2200]
213215
%pR [mem 0x60000000-0x6fffffff pref] or
216+
[mem 0x60000000 pref] or
214217
[mem 0x0000000060000000-0x000000006fffffff pref]
218+
[mem 0x0000000060000000 pref]
215219

216220
For printing struct resources. The ``R`` and ``r`` specifiers result in a
217-
printed resource with (R) or without (r) a decoded flags member.
221+
printed resource with (R) or without (r) a decoded flags member. If start is
222+
equal to end only print the start value.
218223

219224
Passed by reference.
220225

@@ -231,6 +236,19 @@ width of the CPU data path.
231236

232237
Passed by reference.
233238

239+
Struct Range
240+
------------
241+
242+
::
243+
244+
%pra [range 0x0000000060000000-0x000000006fffffff] or
245+
[range 0x0000000060000000]
246+
247+
For printing struct range. struct range holds an arbitrary range of u64
248+
values. If start is equal to end only print the start value.
249+
250+
Passed by reference.
251+
234252
DMA address types dma_addr_t
235253
----------------------------
236254

Documentation/tools/rtla/common_timerlat_options.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,14 @@
3131
*cyclictest* sets this value to *0* by default, use **--dma-latency** *0* to have
3232
similar results.
3333

34+
**--deepest-idle-state** *n*
35+
Disable idle states higher than *n* for cpus that are running timerlat threads to
36+
reduce exit from idle latencies. If *n* is -1, all idle states are disabled.
37+
On exit from timerlat, the idle state setting is restored to its original state
38+
before running timerlat.
39+
40+
Requires rtla to be built with libcpupower.
41+
3442
**-k**, **--kernel-threads**
3543

3644
Use timerlat kernel-space threads, in contrast of **-u**.

Documentation/trace/ftrace.rst

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1031,9 +1031,6 @@ explains which is which.
10311031
CPU#: The CPU which the process was running on.
10321032

10331033
irqs-off: 'd' interrupts are disabled. '.' otherwise.
1034-
.. caution:: If the architecture does not support a way to
1035-
read the irq flags variable, an 'X' will always
1036-
be printed here.
10371034

10381035
need-resched:
10391036
- 'N' both TIF_NEED_RESCHED and PREEMPT_NEED_RESCHED is set,

arch/x86/Kconfig

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2564,15 +2564,14 @@ config MITIGATION_CALL_DEPTH_TRACKING
25642564
default y
25652565
help
25662566
Compile the kernel with call depth tracking to mitigate the Intel
2567-
SKL Return-Speculation-Buffer (RSB) underflow issue. The
2568-
mitigation is off by default and needs to be enabled on the
2569-
kernel command line via the retbleed=stuff option. For
2570-
non-affected systems the overhead of this option is marginal as
2571-
the call depth tracking is using run-time generated call thunks
2572-
in a compiler generated padding area and call patching. This
2573-
increases text size by ~5%. For non affected systems this space
2574-
is unused. On affected SKL systems this results in a significant
2575-
performance gain over the IBRS mitigation.
2567+
SKL Return-Stack-Buffer (RSB) underflow issue. The mitigation is off
2568+
by default and needs to be enabled on the kernel command line via the
2569+
retbleed=stuff option. For non-affected systems the overhead of this
2570+
option is marginal as the call depth tracking is using run-time
2571+
generated call thunks in a compiler generated padding area and call
2572+
patching. This increases text size by ~5%. For non affected systems
2573+
this space is unused. On affected SKL systems this results in a
2574+
significant performance gain over the IBRS mitigation.
25762575

25772576
config CALL_THUNKS_DEBUG
25782577
bool "Enable call thunks and call depth tracking debugging"

arch/x86/coco/tdx/tdx.c

Lines changed: 116 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,32 @@ static inline void tdcall(u64 fn, struct tdx_module_args *args)
7878
panic("TDCALL %lld failed (Buggy TDX module!)\n", fn);
7979
}
8080

81+
/* Read TD-scoped metadata */
82+
static inline u64 tdg_vm_rd(u64 field, u64 *value)
83+
{
84+
struct tdx_module_args args = {
85+
.rdx = field,
86+
};
87+
u64 ret;
88+
89+
ret = __tdcall_ret(TDG_VM_RD, &args);
90+
*value = args.r8;
91+
92+
return ret;
93+
}
94+
95+
/* Write TD-scoped metadata */
96+
static inline u64 tdg_vm_wr(u64 field, u64 value, u64 mask)
97+
{
98+
struct tdx_module_args args = {
99+
.rdx = field,
100+
.r8 = value,
101+
.r9 = mask,
102+
};
103+
104+
return __tdcall(TDG_VM_WR, &args);
105+
}
106+
81107
/**
82108
* tdx_mcall_get_report0() - Wrapper to get TDREPORT0 (a.k.a. TDREPORT
83109
* subtype 0) using TDG.MR.REPORT TDCALL.
@@ -168,7 +194,87 @@ static void __noreturn tdx_panic(const char *msg)
168194
__tdx_hypercall(&args);
169195
}
170196

171-
static void tdx_parse_tdinfo(u64 *cc_mask)
197+
/*
198+
* The kernel cannot handle #VEs when accessing normal kernel memory. Ensure
199+
* that no #VE will be delivered for accesses to TD-private memory.
200+
*
201+
* TDX 1.0 does not allow the guest to disable SEPT #VE on its own. The VMM
202+
* controls if the guest will receive such #VE with TD attribute
203+
* ATTR_SEPT_VE_DISABLE.
204+
*
205+
* Newer TDX modules allow the guest to control if it wants to receive SEPT
206+
* violation #VEs.
207+
*
208+
* Check if the feature is available and disable SEPT #VE if possible.
209+
*
210+
* If the TD is allowed to disable/enable SEPT #VEs, the ATTR_SEPT_VE_DISABLE
211+
* attribute is no longer reliable. It reflects the initial state of the
212+
* control for the TD, but it will not be updated if someone (e.g. bootloader)
213+
* changes it before the kernel starts. Kernel must check TDCS_TD_CTLS bit to
214+
* determine if SEPT #VEs are enabled or disabled.
215+
*/
216+
static void disable_sept_ve(u64 td_attr)
217+
{
218+
const char *msg = "TD misconfiguration: SEPT #VE has to be disabled";
219+
bool debug = td_attr & ATTR_DEBUG;
220+
u64 config, controls;
221+
222+
/* Is this TD allowed to disable SEPT #VE */
223+
tdg_vm_rd(TDCS_CONFIG_FLAGS, &config);
224+
if (!(config & TDCS_CONFIG_FLEXIBLE_PENDING_VE)) {
225+
/* No SEPT #VE controls for the guest: check the attribute */
226+
if (td_attr & ATTR_SEPT_VE_DISABLE)
227+
return;
228+
229+
/* Relax SEPT_VE_DISABLE check for debug TD for backtraces */
230+
if (debug)
231+
pr_warn("%s\n", msg);
232+
else
233+
tdx_panic(msg);
234+
return;
235+
}
236+
237+
/* Check if SEPT #VE has been disabled before us */
238+
tdg_vm_rd(TDCS_TD_CTLS, &controls);
239+
if (controls & TD_CTLS_PENDING_VE_DISABLE)
240+
return;
241+
242+
/* Keep #VEs enabled for splats in debugging environments */
243+
if (debug)
244+
return;
245+
246+
/* Disable SEPT #VEs */
247+
tdg_vm_wr(TDCS_TD_CTLS, TD_CTLS_PENDING_VE_DISABLE,
248+
TD_CTLS_PENDING_VE_DISABLE);
249+
}
250+
251+
/*
252+
* TDX 1.0 generates a #VE when accessing topology-related CPUID leafs (0xB and
253+
* 0x1F) and the X2APIC_APICID MSR. The kernel returns all zeros on CPUID #VEs.
254+
* In practice, this means that the kernel can only boot with a plain topology.
255+
* Any complications will cause problems.
256+
*
257+
* The ENUM_TOPOLOGY feature allows the VMM to provide topology information.
258+
* Enabling the feature eliminates topology-related #VEs: the TDX module
259+
* virtualizes accesses to the CPUID leafs and the MSR.
260+
*
261+
* Enable ENUM_TOPOLOGY if it is available.
262+
*/
263+
static void enable_cpu_topology_enumeration(void)
264+
{
265+
u64 configured;
266+
267+
/* Has the VMM provided a valid topology configuration? */
268+
tdg_vm_rd(TDCS_TOPOLOGY_ENUM_CONFIGURED, &configured);
269+
if (!configured) {
270+
pr_err("VMM did not configure X2APIC_IDs properly\n");
271+
return;
272+
}
273+
274+
tdg_vm_wr(TDCS_TD_CTLS, TD_CTLS_ENUM_TOPOLOGY, TD_CTLS_ENUM_TOPOLOGY);
275+
}
276+
277+
static void tdx_setup(u64 *cc_mask)
172278
{
173279
struct tdx_module_args args = {};
174280
unsigned int gpa_width;
@@ -193,21 +299,13 @@ static void tdx_parse_tdinfo(u64 *cc_mask)
193299
gpa_width = args.rcx & GENMASK(5, 0);
194300
*cc_mask = BIT_ULL(gpa_width - 1);
195301

196-
/*
197-
* The kernel can not handle #VE's when accessing normal kernel
198-
* memory. Ensure that no #VE will be delivered for accesses to
199-
* TD-private memory. Only VMM-shared memory (MMIO) will #VE.
200-
*/
201302
td_attr = args.rdx;
202-
if (!(td_attr & ATTR_SEPT_VE_DISABLE)) {
203-
const char *msg = "TD misconfiguration: SEPT_VE_DISABLE attribute must be set.";
204303

205-
/* Relax SEPT_VE_DISABLE check for debug TD. */
206-
if (td_attr & ATTR_DEBUG)
207-
pr_warn("%s\n", msg);
208-
else
209-
tdx_panic(msg);
210-
}
304+
/* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */
305+
tdg_vm_wr(TDCS_NOTIFY_ENABLES, 0, -1ULL);
306+
307+
disable_sept_ve(td_attr);
308+
enable_cpu_topology_enumeration();
211309
}
212310

213311
/*
@@ -929,10 +1027,6 @@ static void tdx_kexec_finish(void)
9291027

9301028
void __init tdx_early_init(void)
9311029
{
932-
struct tdx_module_args args = {
933-
.rdx = TDCS_NOTIFY_ENABLES,
934-
.r9 = -1ULL,
935-
};
9361030
u64 cc_mask;
9371031
u32 eax, sig[3];
9381032

@@ -947,11 +1041,11 @@ void __init tdx_early_init(void)
9471041
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
9481042

9491043
cc_vendor = CC_VENDOR_INTEL;
950-
tdx_parse_tdinfo(&cc_mask);
951-
cc_set_mask(cc_mask);
9521044

953-
/* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */
954-
tdcall(TDG_VM_WR, &args);
1045+
/* Configure the TD */
1046+
tdx_setup(&cc_mask);
1047+
1048+
cc_set_mask(cc_mask);
9551049

9561050
/*
9571051
* All bits above GPA width are reserved and kernel treats shared bit

arch/x86/include/asm/cpuid.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
#ifndef _ASM_X86_CPUID_H
77
#define _ASM_X86_CPUID_H
88

9+
#include <linux/types.h>
10+
911
#include <asm/string.h>
1012

1113
struct cpuid_regs {
@@ -20,11 +22,11 @@ enum cpuid_regs_idx {
2022
};
2123

2224
#ifdef CONFIG_X86_32
23-
extern int have_cpuid_p(void);
25+
bool have_cpuid_p(void);
2426
#else
25-
static inline int have_cpuid_p(void)
27+
static inline bool have_cpuid_p(void)
2628
{
27-
return 1;
29+
return true;
2830
}
2931
#endif
3032
static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,

arch/x86/include/asm/shared/tdx.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,21 @@
1616
#define TDG_VP_VEINFO_GET 3
1717
#define TDG_MR_REPORT 4
1818
#define TDG_MEM_PAGE_ACCEPT 6
19+
#define TDG_VM_RD 7
1920
#define TDG_VM_WR 8
2021

21-
/* TDCS fields. To be used by TDG.VM.WR and TDG.VM.RD module calls */
22+
/* TDX TD-Scope Metadata. To be used by TDG.VM.WR and TDG.VM.RD */
23+
#define TDCS_CONFIG_FLAGS 0x1110000300000016
24+
#define TDCS_TD_CTLS 0x1110000300000017
2225
#define TDCS_NOTIFY_ENABLES 0x9100000000000010
26+
#define TDCS_TOPOLOGY_ENUM_CONFIGURED 0x9100000000000019
27+
28+
/* TDCS_CONFIG_FLAGS bits */
29+
#define TDCS_CONFIG_FLEXIBLE_PENDING_VE BIT_ULL(1)
30+
31+
/* TDCS_TD_CTLS bits */
32+
#define TD_CTLS_PENDING_VE_DISABLE BIT_ULL(0)
33+
#define TD_CTLS_ENUM_TOPOLOGY BIT_ULL(1)
2334

2435
/* TDX hypercall Leaf IDs */
2536
#define TDVMCALL_MAP_GPA 0x10001

arch/x86/kernel/cpu/common.c

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -276,21 +276,13 @@ static int __init x86_noinvpcid_setup(char *s)
276276
}
277277
early_param("noinvpcid", x86_noinvpcid_setup);
278278

279-
#ifdef CONFIG_X86_32
280-
static int cachesize_override = -1;
281-
static int disable_x86_serial_nr = 1;
282-
283-
static int __init cachesize_setup(char *str)
284-
{
285-
get_option(&str, &cachesize_override);
286-
return 1;
287-
}
288-
__setup("cachesize=", cachesize_setup);
289-
290279
/* Standard macro to see if a specific flag is changeable */
291-
static inline int flag_is_changeable_p(u32 flag)
280+
static inline bool flag_is_changeable_p(unsigned long flag)
292281
{
293-
u32 f1, f2;
282+
unsigned long f1, f2;
283+
284+
if (!IS_ENABLED(CONFIG_X86_32))
285+
return true;
294286

295287
/*
296288
* Cyrix and IDT cpus allow disabling of CPUID
@@ -313,11 +305,22 @@ static inline int flag_is_changeable_p(u32 flag)
313305
: "=&r" (f1), "=&r" (f2)
314306
: "ir" (flag));
315307

316-
return ((f1^f2) & flag) != 0;
308+
return (f1 ^ f2) & flag;
317309
}
318310

311+
#ifdef CONFIG_X86_32
312+
static int cachesize_override = -1;
313+
static int disable_x86_serial_nr = 1;
314+
315+
static int __init cachesize_setup(char *str)
316+
{
317+
get_option(&str, &cachesize_override);
318+
return 1;
319+
}
320+
__setup("cachesize=", cachesize_setup);
321+
319322
/* Probe for the CPUID instruction */
320-
int have_cpuid_p(void)
323+
bool have_cpuid_p(void)
321324
{
322325
return flag_is_changeable_p(X86_EFLAGS_ID);
323326
}
@@ -349,10 +352,6 @@ static int __init x86_serial_nr_setup(char *s)
349352
}
350353
__setup("serialnumber", x86_serial_nr_setup);
351354
#else
352-
static inline int flag_is_changeable_p(u32 flag)
353-
{
354-
return 1;
355-
}
356355
static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
357356
{
358357
}
@@ -1088,7 +1087,6 @@ void get_cpu_address_sizes(struct cpuinfo_x86 *c)
10881087

10891088
static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
10901089
{
1091-
#ifdef CONFIG_X86_32
10921090
int i;
10931091

10941092
/*
@@ -1109,7 +1107,6 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
11091107
break;
11101108
}
11111109
}
1112-
#endif
11131110
}
11141111

11151112
#define NO_SPECULATION BIT(0)

arch/x86/kernel/cpu/sgx/main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -630,7 +630,7 @@ static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
630630
if (!section->virt_addr)
631631
return false;
632632

633-
section->pages = vmalloc(nr_pages * sizeof(struct sgx_epc_page));
633+
section->pages = vmalloc_array(nr_pages, sizeof(struct sgx_epc_page));
634634
if (!section->pages) {
635635
memunmap(section->virt_addr);
636636
return false;

0 commit comments

Comments
 (0)