Skip to content

Commit 99ed546

Browse files
mgcaolijinxia
authored andcommitted
DM: add a thread to monitor UOS ptdev intr status
This patch is for "interrupt storm mitigation", used to reduce the effect on SOS if an "interrupt storm" happens in UOS. Add a monitor thread to get UOS pass-through devices interrupt freqency data; currently, if "interrupt storm" happens, it'll send a command to delay interrupt injection to UOS for some time. The parameters: interrupt storm threshold and delay time can be adjusted according differt HW configure and use case. Tracked-On: #866 Signed-off-by: Minggui Cao <minggui.cao@intel.com> Reviewed-by: Yin Fengwei <fengwei.yin@intel.com> Acked-by: Anthony Xu <anthony.xu@intel.com>
1 parent d123083 commit 99ed546

File tree

5 files changed

+184
-0
lines changed

5 files changed

+184
-0
lines changed

devicemodel/core/monitor.c

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,163 @@
1515
#include <string.h>
1616
#include <sys/stat.h>
1717
#include <sys/queue.h>
18+
#include <unistd.h>
1819
#include <pthread.h>
1920
#include "dm.h"
2021
#include "monitor.h"
2122
#include "acrn_mngr.h"
2223
#include "pm.h"
24+
#include "vmmapi.h"
25+
26+
#define INTR_STORM_MONITOR_PERIOD 10 /* 10 seconds */
27+
#define INTR_STORM_THRESHOLD 100000 /* 10K times per second */
28+
29+
#define DELAY_INTR_TIME 1 /* 1ms */
30+
#define DELAY_DURATION 100000 /* 100ms of total duration for delay intr */
31+
#define TIME_TO_CHECK_AGAIN 2 /* 2seconds */
32+
33+
union intr_monitor_t {
34+
struct acrn_intr_monitor monitor;
35+
char reserved[4096];
36+
} __aligned(4096);
37+
38+
static union intr_monitor_t intr_data;
39+
static uint64_t intr_cnt_buf[MAX_PTDEV_NUM * 2];
40+
static pthread_t intr_storm_monitor_pid;
41+
42+
/* switch macro, just open in debug */
43+
/* #define INTR_MONITOR_DBG */
44+
45+
#ifdef INTR_MONITOR_DBG
46+
static FILE * dbg_file;
47+
#define DPRINTF(format, args...) \
48+
do { fprintf(dbg_file, format, args); fflush(dbg_file); } while (0)
49+
50+
/* this is a debug function */
51+
static void write_intr_data_to_file(const struct acrn_intr_monitor *hdr)
52+
{
53+
static int wr_cnt;
54+
int j;
55+
56+
wr_cnt++;
57+
fprintf(dbg_file, "\n==%d time devs=%d==\n", wr_cnt, hdr->buf_cnt / 2);
58+
fprintf(dbg_file, "IRQ\t\tCount\n");
59+
60+
for (j = 0; j < hdr->buf_cnt; j += 2) {
61+
if (hdr->buffer[j + 1] != 0) {
62+
fprintf(dbg_file, "%ld\t\t%ld\n", hdr->buffer[j],
63+
hdr->buffer[j + 1]);
64+
}
65+
}
66+
67+
fflush(dbg_file);
68+
}
69+
#else
70+
#define DPRINTF(format, arg...)
71+
#endif
72+
73+
static void *intr_storm_monitor_thread(void *arg)
74+
{
75+
struct vmctx *ctx = (struct vmctx *)arg;
76+
struct acrn_intr_monitor *hdr = &intr_data.monitor;
77+
uint64_t delta = 0UL;
78+
int ret, i;
79+
80+
#ifdef INTR_MONITOR_DBG
81+
dbg_file = fopen("/tmp/intr_log", "w+");
82+
#endif
83+
sleep(INTR_STORM_MONITOR_PERIOD);
84+
85+
/* first to get interrupt data */
86+
hdr->cmd = INTR_CMD_GET_DATA;
87+
hdr->buf_cnt = MAX_PTDEV_NUM * 2;
88+
memset(hdr->buffer, 0, sizeof(uint64_t) * hdr->buf_cnt);
89+
90+
ret = vm_intr_monitor(ctx, hdr);
91+
if (ret) {
92+
DPRINTF("first get intr data failed, ret: %d\n", ret);
93+
intr_storm_monitor_pid = 0;
94+
return NULL;
95+
}
96+
97+
while (1) {
98+
#ifdef INTR_MONITOR_DBG
99+
write_intr_data_to_file(hdr);
100+
#endif
101+
memcpy(intr_cnt_buf, hdr->buffer,
102+
sizeof(uint64_t) * hdr->buf_cnt);
103+
sleep(INTR_STORM_MONITOR_PERIOD);
104+
105+
/* next time to get interrupt data */
106+
memset(hdr->buffer, 0, sizeof(uint64_t) * hdr->buf_cnt);
107+
ret = vm_intr_monitor(ctx, hdr);
108+
if (ret) {
109+
DPRINTF("next get intr data failed, ret: %d\n", ret);
110+
intr_storm_monitor_pid = 0;
111+
break;
112+
}
113+
114+
/*
115+
* calc the delta of the two times count of interrupt;
116+
* compare the IRQ num first, if not same just drop it,
117+
* for it just happens rarelly when devices dynamically
118+
* allocation in SOS or UOS, it can be calc next time
119+
*/
120+
for (i = 0; i < hdr->buf_cnt; i += 2) {
121+
if (hdr->buffer[i] != intr_cnt_buf[i])
122+
continue;
123+
124+
delta = hdr->buffer[i + 1] - intr_cnt_buf[i + 1];
125+
if (delta > INTR_STORM_THRESHOLD) {
126+
#ifdef INTR_MONITOR_DBG
127+
write_intr_data_to_file(hdr);
128+
#endif
129+
break;
130+
}
131+
}
132+
133+
/* storm detected, handle the intr abnormal status */
134+
if (i < hdr->buf_cnt) {
135+
DPRINTF("irq=%ld, delta=%ld\n", intr_cnt_buf[i], delta);
136+
137+
hdr->cmd = INTR_CMD_DELAY_INT;
138+
hdr->buffer[0] = DELAY_INTR_TIME;
139+
vm_intr_monitor(ctx, hdr);
140+
usleep(DELAY_DURATION); /* sleep-delay intr */
141+
hdr->buffer[0] = 0; /* cancel to delay intr */
142+
vm_intr_monitor(ctx, hdr);
143+
144+
sleep(TIME_TO_CHECK_AGAIN); /* time to get data again */
145+
hdr->cmd = INTR_CMD_GET_DATA;
146+
hdr->buf_cnt = MAX_PTDEV_NUM * 2;
147+
memset(hdr->buffer, 0,
148+
sizeof(uint64_t) * hdr->buf_cnt);
149+
vm_intr_monitor(ctx, hdr);
150+
}
151+
}
152+
153+
return NULL;
154+
}
155+
156+
static void start_intr_storm_monitor(struct vmctx *ctx)
157+
{
158+
int ret = pthread_create(&intr_storm_monitor_pid, NULL,
159+
intr_storm_monitor_thread, ctx);
160+
if (ret) {
161+
printf("failed %s %d\n", __func__, __LINE__);
162+
intr_storm_monitor_pid = 0;
163+
}
164+
165+
printf("start monitor interrupt data...\n");
166+
}
167+
168+
static void stop_intr_storm_monitor(void)
169+
{
170+
if (intr_storm_monitor_pid) {
171+
pthread_cancel(intr_storm_monitor_pid);
172+
intr_storm_monitor_pid = 0;
173+
}
174+
}
23175

24176
/* helpers */
25177
/* Check if @path is a directory, and create if not exist */
@@ -255,6 +407,8 @@ int monitor_init(struct vmctx *ctx)
255407

256408
monitor_register_vm_ops(&pmc_ops, ctx, "PMC_VM_OPs");
257409

410+
start_intr_storm_monitor(ctx);
411+
258412
return 0;
259413

260414
handlers_err:
@@ -269,4 +423,6 @@ void monitor_close(void)
269423
{
270424
if (monitor_fd >= 0)
271425
mngr_close(monitor_fd);
426+
427+
stop_intr_storm_monitor();
272428
}

devicemodel/core/vmmapi.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,3 +651,9 @@ vm_get_cpu_state(struct vmctx *ctx, void *state_buf)
651651
{
652652
return ioctl(ctx->fd, IC_PM_GET_CPU_STATE, state_buf);
653653
}
654+
655+
int
656+
vm_intr_monitor(struct vmctx *ctx, void *intr_buf)
657+
{
658+
return ioctl(ctx->fd, IC_VM_INTR_MONITOR, intr_buf);
659+
}

devicemodel/include/public/acrn_common.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,26 @@ enum pm_cmd_type {
455455
PMCMD_GET_CX_DATA,
456456
};
457457

458+
/**
459+
* @brief Info to get a VM interrupt count data
460+
*
461+
* the parameter for HC_VM_INTR_MONITOR hypercall
462+
*/
463+
#define MAX_PTDEV_NUM 24
464+
struct acrn_intr_monitor {
465+
/** sub command for intr monitor */
466+
uint32_t cmd;
467+
/** the count of this buffer to save */
468+
uint32_t buf_cnt;
469+
470+
/** the buffer which save each interrupt count */
471+
uint64_t buffer[MAX_PTDEV_NUM * 2];
472+
} __aligned(8);
473+
474+
/** cmd for intr monitor **/
475+
#define INTR_CMD_GET_DATA 0
476+
#define INTR_CMD_DELAY_INT 1
477+
458478
/**
459479
* @}
460480
*/

devicemodel/include/public/vhm_ioctl_defs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@
8080
#define IC_DEASSERT_IRQLINE _IC_ID(IC_ID, IC_ID_IRQ_BASE + 0x01)
8181
#define IC_PULSE_IRQLINE _IC_ID(IC_ID, IC_ID_IRQ_BASE + 0x02)
8282
#define IC_INJECT_MSI _IC_ID(IC_ID, IC_ID_IRQ_BASE + 0x03)
83+
#define IC_VM_INTR_MONITOR _IC_ID(IC_ID, IC_ID_IRQ_BASE + 0x04)
8384

8485
/* DM ioreq management */
8586
#define IC_ID_IOREQ_BASE 0x30UL

devicemodel/include/vmmapi.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ int vm_reset_ptdev_intx_info(struct vmctx *ctx, int virt_pin, bool pic_pin);
152152
int vm_create_vcpu(struct vmctx *ctx, uint16_t vcpu_id);
153153

154154
int vm_get_cpu_state(struct vmctx *ctx, void *state_buf);
155+
int vm_intr_monitor(struct vmctx *ctx, void *intr_buf);
155156
void vm_stop_watchdog(struct vmctx *ctx);
156157
void vm_reset_watchdog(struct vmctx *ctx);
157158
#endif /* _VMMAPI_H_ */

0 commit comments

Comments
 (0)