Skip to content

Commit b30ba3d

Browse files
lauxinwjren1
authored andcommitted
tools:acrn-crashlog: Detect and classify the crash in ACRN and kernel
Since ACRN has the capability to reboot and reboot reason is available in SOS, acrnprobe could detect the crash of acrn and SOS kernel. List of added crash types: 1. ACRNCRASH - crashed in hypervisor, this detection depends on files in /tmp/acrnlog_last(provided by acrnlog). 2. IPANIC - crashed in SOS kernel, this detection depends on pstore. 3. SWWDT_IPANIC - crashed in SOS kernel and reboot reason is wdt. 4. HWWDT_UNHANDLE - only recognize reboot reason is global, there is no further clues that it's a SOS kernel crash or a hypervisor crash. 5. SWWDT_UNHANDLE - only recognize reboot reason is wdt, there is no further clues that it's a SOS kernel crash or a hypervisor crash. 6. UNKNOWN - only recognize reboot reason is warm, there is no further clues that it's a SOS kernel crash or a hypervisor crash. Signed-off-by: Liu, Xinwu <xinwu.liu@intel.com> Acked-by: Chen Gang <gang.c.chen@intel.com>
1 parent a5853d6 commit b30ba3d

File tree

5 files changed

+138
-33
lines changed

5 files changed

+138
-33
lines changed

tools/acrn-crashlog/acrnprobe/channels.c

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
#include "fsutils.h"
2121
#include "strutils.h"
2222
#include "channels.h"
23+
#include "startupreason.h"
24+
#include "probeutils.h"
2325
#include "log_sys.h"
2426

2527
#define POLLING_TIMER_SIG 0xCEAC
@@ -105,25 +107,40 @@ static void channel_oneshot(struct channel_t *cnl)
105107

106108
LOGD("initializing channel %s ...\n", cname);
107109

110+
if (!is_boot_id_changed())
111+
return;
112+
108113
e = create_event(REBOOT, cname, NULL, 0, NULL);
109114
if (e)
110115
event_enqueue(e);
111116

112-
113117
for_each_crash(id, crash, conf) {
114118
if (!crash || !is_root_crash(crash))
115119
continue;
116120

117121
if (strcmp(crash->channel, cname))
118122
continue;
119123

120-
if (crash->trigger &&
121-
!strcmp("file", crash->trigger->type) &&
122-
file_exists(crash->trigger->path)) {
123-
e = create_event(CRASH, cname, (void *)crash,
124-
0, crash->trigger->path);
125-
if (e)
126-
event_enqueue(e);
124+
if (!crash->trigger)
125+
continue;
126+
127+
if (!strcmp("file", crash->trigger->type)) {
128+
if (file_exists(crash->trigger->path)) {
129+
e = create_event(CRASH, cname, (void *)crash,
130+
0, crash->trigger->path);
131+
if (e)
132+
event_enqueue(e);
133+
}
134+
} else if (!strcmp("rebootreason", crash->trigger->type)) {
135+
char rreason[REBOOT_REASON_SIZE];
136+
137+
read_startupreason(rreason, sizeof(rreason));
138+
if (!strcmp(rreason, crash->content[0])) {
139+
e = create_event(CRASH, cname, (void *)crash,
140+
0, crash->trigger->path);
141+
if (e)
142+
event_enqueue(e);
143+
}
127144
}
128145
}
129146

tools/acrn-crashlog/acrnprobe/include/probeutils.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,5 +40,6 @@ void generate_crashfile(char *dir, char *event, char *hashkey,
4040
char *type, char *data0,
4141
char *data1, char *data2);
4242
char *generate_log_dir(enum e_dir_mode mode, char *hashkey);
43+
int is_boot_id_changed(void);
4344

4445
#endif

tools/acrn-crashlog/acrnprobe/probeutils.c

Lines changed: 52 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,21 @@
3838
#define STATS_CURRENT_LOG "currentstatslog"
3939
#define VM_CURRENT_LOG "currentvmlog"
4040

41+
#define BOOTID_NODE "/proc/sys/kernel/random/boot_id"
42+
#define BOOTID_LOG "currentbootid"
43+
4144
unsigned long long get_uptime(void)
4245
{
43-
static long long time_ns = -1;
44-
struct timespec ts;
46+
long long time_ns;
47+
struct timespec ts;
48+
int res;
4549

46-
clock_gettime(CLOCK_BOOTTIME, &ts);
47-
time_ns = (long long)ts.tv_sec * 1000000000LL +
48-
(long long)ts.tv_nsec;
50+
res = clock_gettime(CLOCK_BOOTTIME, &ts);
51+
if (res == -1)
52+
return res;
53+
54+
time_ns = (long long)ts.tv_sec * 1000000000LL +
55+
(long long)ts.tv_nsec;
4956

5057
return time_ns;
5158
}
@@ -56,6 +63,8 @@ int get_uptime_string(char *newuptime, int *hours)
5663
int seconds, minutes;
5764

5865
tm = get_uptime();
66+
if (tm == -1)
67+
return -1;
5968

6069
/* seconds */
6170
*hours = (int)(tm / 1000000000LL);
@@ -444,3 +453,41 @@ char *generate_log_dir(enum e_dir_mode mode, char *hashkey)
444453

445454
return strdup(path);
446455
}
456+
457+
int is_boot_id_changed(void)
458+
{
459+
void *boot_id;
460+
void *logged_boot_id;
461+
char logged_boot_id_path[PATH_MAX];
462+
unsigned long size;
463+
struct sender_t *crashlog;
464+
int res;
465+
int result = 1; /* returns changed by default */
466+
467+
crashlog = get_sender_by_name("crashlog");
468+
if (!crashlog)
469+
return result;
470+
471+
res = read_file(BOOTID_NODE, &size, &boot_id);
472+
if (res == -1)
473+
return result;
474+
475+
snprintf(logged_boot_id_path, sizeof(logged_boot_id_path), "%s/%s",
476+
crashlog->outdir, BOOTID_LOG);
477+
if (file_exists(logged_boot_id_path)) {
478+
res = read_file(logged_boot_id_path, &size, &logged_boot_id);
479+
if (res == -1)
480+
goto out;
481+
482+
if (!strcmp((char *)logged_boot_id, (char *)boot_id))
483+
result = 0;
484+
485+
free(logged_boot_id);
486+
}
487+
488+
if (result)
489+
overwrite_file(logged_boot_id_path, boot_id);
490+
out:
491+
free(boot_id);
492+
return result;
493+
}

tools/acrn-crashlog/data/acrnprobe.service

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
Description=ACRN crashlog probe
33
Requires=telemd.socket
44
Requires=usercrash_s
5+
After=acrnlog.service
56
After=usercrash.service
67
After=prepare.service
78

tools/acrn-crashlog/data/acrnprobe.xml

Lines changed: 59 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@
2727
<triggers>
2828
<trigger id="1" enable="true">
2929
<name>t_pstore</name>
30-
<type>file</type>
31-
<path>/sys/fs/pstore/console-ramoops</path>
30+
<type>node</type>
31+
<path>/sys/fs/pstore/console-ramoops-0</path>
3232
</trigger>
3333
<trigger id="2" enable="true">
3434
<name>t_boot</name>
@@ -40,6 +40,15 @@
4040
<type>dir</type>
4141
<path>/var/log/usercrashes</path>
4242
</trigger>
43+
<trigger id="4" enable="true">
44+
<name>t_rebootreason</name>
45+
<type>rebootreason</type>
46+
</trigger>
47+
<trigger id="5" enable="true">
48+
<name>t_acrnlog_last</name>
49+
<type>file</type>
50+
<path>/tmp/acrnlog/acrnlog_last.[*]</path>
51+
</trigger>
4352
</triggers>
4453

4554
<vms enable="true">
@@ -57,8 +66,8 @@
5766
<logs>
5867
<log id="1" enable="true">
5968
<name>pstore</name>
60-
<type>file</type>
61-
<path>/sys/fs/pstore/console-ramoops</path>
69+
<type>node</type>
70+
<path>/sys/fs/pstore/console-ramoops-0</path>
6271
</log>
6372
<log id='2' enable='true'>
6473
<name>kmsg</name>
@@ -90,34 +99,66 @@
9099

91100
<crashes>
92101
<crash id='1' inherit='0' enable='true'>
93-
<name>IPANIC</name>
94-
<trigger>t_pstore</trigger>
102+
<name>UNKNOWN</name>
103+
<trigger>t_rebootreason</trigger>
95104
<channel>oneshot</channel>
105+
<content id='1'>WARM</content>
96106
<log id='1'>pstore</log>
107+
<log id='2'>acrnlog_last</log>
108+
</crash>
109+
<crash id='2' inherit='0' enable='true'>
110+
<name>SWWDT_UNHANDLE</name>
111+
<trigger>t_rebootreason</trigger>
112+
<channel>oneshot</channel>
113+
<content id='1'>WATCHDOG</content>
114+
<log id='1'>pstore</log>
115+
<log id='2'>acrnlog_last</log>
116+
</crash>
117+
<crash id='3' inherit='0' enable='true'>
118+
<name>HWWDT_UNHANDLE</name>
119+
<trigger>t_rebootreason</trigger>
120+
<channel>oneshot</channel>
121+
<content id='1'>GLOBAL</content>
122+
<log id='1'>pstore</log>
123+
<log id='2'>acrnlog_last</log>
124+
</crash>
125+
<crash id='4' inherit='1' enable='true'>
126+
<name>ACRNCRASH</name>
127+
<trigger>t_acrnlog_last</trigger>
128+
<content id='1'>= Unhandled exception:</content>
129+
</crash>
130+
<crash id='5' inherit='1' enable='true'>
131+
<name>IPANIC</name>
132+
<trigger>t_pstore</trigger>
133+
<content id='1'> </content>
134+
<mightcontent expression='1' id='1'>Kernel panic - not syncing:</mightcontent>
135+
<mightcontent expression='1' id='2'>BUG: unable to handle kernel</mightcontent>
97136
<data id='1'>kernel BUG at</data>
98137
<data id='2'>EIP is at</data>
99138
<data id='3'>Comm:</data>
100139
</crash>
101-
<crash id='2' inherit='1' enable='true'>
102-
<name>IPANIC_SWWDT</name>
103-
<content id='1'>BUG: soft lockup - CPU#</content>
140+
<crash id='6' inherit='2' enable='true'>
141+
<name>ACRNCRASH</name>
142+
<trigger>t_acrnlog_last</trigger>
143+
<content id='1'>= Unhandled exception:</content>
104144
</crash>
105-
<crash id='3' inherit='2' enable='true'>
106-
<name>IPANIC_SWWDT_FAKE</name>
107-
<mightcontent expression='1' id='1'>EIP: panic_dbg_set</mightcontent>
108-
<mightcontent expression='1' id='2'>RIP: panic_dbg_set</mightcontent>
145+
<crash id='7' inherit='2' enable='true'>
146+
<name>SWWDT_IPANIC</name>
147+
<trigger>t_pstore</trigger>
148+
<content id='1'> </content>
149+
<mightcontent expression='1' id='1'>Kernel panic - not syncing:</mightcontent>
150+
<mightcontent expression='1' id='2'>BUG: unable to handle kernel</mightcontent>
151+
<data id='1'>kernel BUG at</data>
152+
<data id='2'>EIP is at</data>
153+
<data id='3'>Comm:</data>
109154
</crash>
110-
<crash id='4' inherit='0' enable='true'>
155+
<crash id='8' inherit='0' enable='true'>
111156
<name>USERCRASH</name>
112157
<trigger>t_usercrash</trigger>
113158
<channel>inotify</channel>
114159
<log id='1'>kmsg</log>
115160
<log id='2'>syslog</log>
116161
</crash>
117-
<crash id='5' inherit='1' enable='true'>
118-
<name>IPANIC_HWWDT</name>
119-
<content id='1'>Watchdog detected hard LOCKUP on cpu</content>
120-
</crash>
121162
</crashes>
122163

123164
<infos>
@@ -132,6 +173,4 @@
132173
</info>
133174
</infos>
134175

135-
136-
137176
</conf>

0 commit comments

Comments
 (0)