-
Notifications
You must be signed in to change notification settings - Fork 18
/
drsnoop
executable file
·219 lines (197 loc) · 6.58 KB
/
drsnoop
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
#!/usr/bin/env bash
#
# drsnoop - trace direct reclaim latency.
# Written using Linux ftrace.
#
# This traces direct reclaim at the vmscan interface, using the vmscan:
# tracepoints. This can help characterize the direct reclaim requested
# and their resulting performance.
#
# USAGE: ./drsnoop [-hst] [-p pid] [-n name] [duration]
#
# Run "drsnoop -h" for full usage.
#
# REQUIREMENTS: FTRACE CONFIG, vmscan:mm_vmscan_direct_reclaim_* tracepoints
# (you may already have these on recent kernels).
#
# OVERHEAD: By default, drsnoop works without buffering, printing direct reclaim
# events as they happen (uses trace_pipe).
#
# This was written as a proof of concept for ftrace. It would be better written
# using perf_events (after some capabilities are added), which has a better
# buffering policy, or a tracer such as SystemTap or ktap.
#
# 19-Jun-2018 Ethercflow Created this.
### default variables
tracing=/sys/kernel/debug/tracing
flock=/var/tmp/.ftrace-lock
bufsize_kb=4096
opt_duration=0; duration=; opt_name=0; name=; opt_pid=0; pid=; ftext=
opt_start=0; opt_end=0;
trap ':' INT QUIT TERM PIPE HUP # sends execution to end tracing section
function usage {
cat <<-END >&2
USAGE: drsnoop [-hst] [-p PID] [-n name]
[duration]
-n name # process name to match on direct reclaim latency
-p PID # PID to match on direct reclaim latency
-s # include start time of direct reclaim (s)
-t # include end time of direct reclaim (s)
-h # this usage message
duration # duration seconds, and use buffers
eg,
drsnoop # watch direct reclaim latency live (unbuffered)
drsnoop 1 # trace 1 sec (buffered)
drsnoop -ts # include start and end timestamps
drsnoop -p 91 # show direct reclaim latency when PID 91 is on-CPU
END
exit
}
function warn {
if ! eval "$@"; then
echo >&2 "WARNING: command failed \"$@\""
fi
}
function end {
# disable tracing
echo 2>/dev/null
echo "Ending tracing..." 2>/dev/null
cd $tracing
warn "echo 0 > events/vmscan/mm_vmscan_direct_reclaim_begin/enable"
warn "echo 0 > events/vmscan/mm_vmscan_direct_reclaim_end/enable"
if (( opt_pid )); then
warn "echo 0 > events/vmscan/mm_vmscan_direct_reclaim_begin/filter"
warn "echo 0 > events/vmscan/mm_vmscan_direct_reclaim_end/filter"
fi
warn "echo > trace"
(( wroteflock )) && warn "rm $flock"
}
function die {
echo >&2 "$@"
exit 1
}
function edie {
# die with a quiet end()
echo >&2 "$@"
exec >/dev/null 2>&1
end
exit 1
}
### process options
while getopts h:n:p:st opt
do
case $opt in
n) opt_name=1; name=$OPTARG ;;
p) opt_pid=1; pid=$OPTARG ;;
s) opt_start=1 ;;
t) opt_end=1 ;;
h|?) usage ;;
esac
done
shift $(( $OPTIND - 1 ))
if (( $# )); then
opt_duration=1
duration=$1
shift
fi
### option logic
(( opt_pid && opt_name )) && die "ERROR: use either -p or -n."
(( opt_pid )) && ftext=" issued by PID $pid"
(( opt_name )) && ftext=" issued by process name \"$name\""
if (( opt_duration )); then
echo "Tracing vmscan direct reclaim$ftext for $duration seconds (buffered)..."
else
echo "Tracing vmscan direct reclaim$ftext. Ctrl-C to end."
fi
### select awk
(( opt_duration )) && use=mawk || use=gawk # workaround for mawk fflush()
[[ -x /usr/bin/$use ]] && awk=$use || awk=awk
wroteflock=1
### check permissions
cd $tracing || die "ERROR: accessing tracing. Root user? Kernel has FTRACE?
debugfs mounted? (mount -t debugfs debugfs /sys/kernel/debug)"
### ftrace lock
[[ -e $flock ]] && die "ERROR: ftrace may be in use by PID $(cat $flock) $flock"
echo $$ > $flock || die "ERROR: unable to write $flock."
### setup and begin tracing
echo nop > current_tracer
warn "echo $bufsize_kb > buffer_size_kb"
# TODO: support filter
if ! echo 1 > events/vmscan/mm_vmscan_direct_reclaim_begin/enable || \
! echo 1 > events/vmscan/mm_vmscan_direct_reclaim_end/enable; then
edie "ERROR: enabling vmscan direct reclaim tracepoints. Exiting."
fi
printf "%-12.12s %-6s" "COMM" "PID"
(( opt_start )) && printf "%-15s " "START"
(( opt_end )) && printf "%-15s " "END"
printf "%-8s %6s \n" "LATms" "PAGES"
#
# Determine output format. It may be one of the following (newest first):
# TASK-PID CPU# |||| TIMESTAMP FUNCTION
# TASK-PID CPU# TIMESTAMP FUNCTION
# To differentiate between them, the number of header fields is counted,
# and an offset set, to skip the extra column when needed.
#
offset=$($awk 'BEGIN { o = 0; }
$1 == "#" && $2 ~ /TASK/ && NF == 6 { o = 1; }
$2 ~ /TASK/ { print o; exit }' trace)
### print trace buffer
warn "echo > trace"
( if (( opt_duration )); then
# wait then dump buffer
sleep $duration
cat trace
else
# print buffer live
cat trace_pipe
fi ) | $awk -v o=$offset -v opt_name=$opt_name -v name=$name \
-v opt_duration=$opt_duration -v opt_start=$opt_start -v opt_end=$opt_end '
# common fields
$1 != "#" {
# task name can contain dashes
comm = pid = $1
sub(/-[0-9][0-9]*/, "", comm)
sub(/.*-/, "", pid)
time = $(3+o); sub(":", "", time)
}
# mm_vmscan_direct_reclaim_begin
$1 != "#" && $0 ~ /mm_vmscan_direct_reclaim_begin/ {
if (opt_name && match(comm, name) == 0)
next
#
# example: comm-pid [cpu] .... TIMESTAMP: mm_vmscan_direct_reclaim_begin:
#
starts[$1] = time
comms[$1] = comm
pids[$1] = pid
next
}
# mm_vmscan_direct_reclaim_end
$1 != "#" && $0 ~ /mm_vmscan_direct_reclaim_end/ {
#
# example: comm-pid [cpu] .... TIMESTAMP: mm_vmscan_direct_reclaim_end: nr_reclaimed=32
#
if (starts[$1] > 0) {
latency = sprintf("%.2f",
1000 * (time - starts[$1]))
nr_reclaimed = $(5+o); sub(/.*=/, "", nr_reclaimed)
comm = comms[$1]
pid = pids[$1]
printf "%-12.12s %-6s", comm, pid
if (opt_start)
printf "%-15s ", starts[$1]
if (opt_end)
printf "%-15s ", time
printf "%-8s %6s \n", latency, nr_reclaimed
if (!opt_duration)
fflush()
delete starts[$1]
delete comms[$1]
delete pids[$1]
}
next
}
$0 ~ /LOST.*EVENTS/ { print "WARNING: " $0 > "/dev/stderr" }
'
### end tracing
end