Skip to content

Commit

Permalink
Add a mode to record total time spent in functions.
Browse files Browse the repository at this point in the history
  • Loading branch information
nelhage committed May 29, 2010
1 parent e7ee2ed commit ac10c55
Show file tree
Hide file tree
Showing 3 changed files with 215 additions and 24 deletions.
11 changes: 4 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
udis86=/home/nelhage/sw/udis86
CFLAGS=-g
SO_LDFLAGS=-fPIC -shared

LIBS=-lbfd -L$(udis86)/lib -ludis86
CFLAGS=-I$(udis86)/include -g
SO_LDFLAGS=-nostdlib -fPIC -shared

all: replace-ret preload.so
all: preload.so

%: %.c
$(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $< $(LIBS) -o $@

preload.so: preload.c mcount.o
preload.so: preload.c
$(CC) $(CFLAGS) $(SO_LDFLAGS) $< -o $@
183 changes: 166 additions & 17 deletions preload.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <stdio.h>
#include <ucontext.h>
#include <stdlib.h>
#include <string.h>

struct trace_ent {
unsigned long ip;
Expand All @@ -14,40 +15,188 @@ struct trace_ent {

static struct trace_ent *trace_buf;
static struct trace_ent *next;
static int trace_fd;
static int trace_fd = -1;

void flush_trace_buf() {
write(trace_fd, trace_buf, TRACE_BUF_SIZE * sizeof *trace_buf);
struct function_record {
unsigned long ip;
int calls;
int depth;
struct timeval entry;
struct timeval elapsed;
struct function_record *hash_next;
};

struct profile_entry {
unsigned long ip;
unsigned long calls;
unsigned long elapsed_sec;
unsigned long elapsed_usec;
} __attribute__((packed));

#define HASH_SIZE 4096
#define hash_ip(ip) ((ip) % HASH_SIZE)

static struct function_record **fn_hash;

static struct function_record *lookup_record(unsigned long ip) {
struct function_record *f = fn_hash[hash_ip(ip)];
while (f && f->ip != ip) f = f->hash_next;
return f;
}

static struct function_record *new_record(unsigned long ip) {
struct function_record *r = lookup_record(ip);

if (r) return r;

r = malloc(sizeof *r);
memset(r, 0, sizeof *r);

r->ip = ip;
r->hash_next = fn_hash[hash_ip(ip)];
fn_hash[hash_ip(ip)] = r;
return r;
}

static void flush_trace_buf() {
write(trace_fd, trace_buf, (next - trace_buf) * sizeof *trace_buf);
next = trace_buf;
}

void add_trace(int entry, unsigned long ip) __attribute__((regparm(3)));
void add_trace(int entry, unsigned long ip) {
static void add_trace(int entry, unsigned long ip, struct timeval *tv) {
if (!next)
return;
next->ip = (ip << 1) | !!entry;
gettimeofday(&next->tv, NULL);
next->tv = *tv;

if(++next == trace_buf + TRACE_BUF_SIZE)
flush_trace_buf();
}

/* Subtract the `struct timeval' values X and Y,
storing the result in RESULT.
Return 1 if the difference is negative, otherwise 0. */
int
timeval_subtract (result, x, y)
struct timeval *result, *x, *y;
{
/* Perform the carry for the later subtraction by updating y. */
if (x->tv_usec < y->tv_usec) {
int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
y->tv_usec -= 1000000 * nsec;
y->tv_sec += nsec;
}
if (x->tv_usec - y->tv_usec > 1000000) {
int nsec = (x->tv_usec - y->tv_usec) / 1000000;
y->tv_usec += 1000000 * nsec;
y->tv_sec -= nsec;
}

/* Compute the time remaining to wait.
tv_usec is certainly positive. */
result->tv_sec = x->tv_sec - y->tv_sec;
result->tv_usec = x->tv_usec - y->tv_usec;

/* Return 1 if result is negative. */
return x->tv_sec < y->tv_sec;
}

int timeval_add(struct timeval *res, const struct timeval *x, const struct timeval *y)
{
res->tv_sec = x->tv_sec+y->tv_sec;

res->tv_usec = x->tv_usec + y->tv_usec;

while( res->tv_usec > 1000000 ){
res->tv_usec -= 1000000;
res->tv_sec++;
}

return 0;
}

void accrue_time(struct function_record *r, struct timeval *tv) {
struct timeval t1, t2;
timeval_subtract(&t1, tv, &r->entry);
timeval_add(&t2, &t1, &r->elapsed);
r->elapsed = t2;
}

static void log_times(void) {
struct function_record *r;
struct profile_entry prof;
int fd, i;

fd = open("langer.prof", O_WRONLY|O_TRUNC|O_CREAT, 0644);
if (fd < 0) {
perror("open(langer.prof)");
return;
}

for (i = 0; i < HASH_SIZE; i++) {
r = fn_hash[i];
while (r) {
prof.ip = r->ip;
prof.calls = r->calls;
prof.elapsed_sec = r->elapsed.tv_sec;
prof.elapsed_usec = r->elapsed.tv_usec;
write(fd, &prof, sizeof prof);
r = r->hash_next;
}
}

close(fd);
}

void __cyg_profile_func_enter (void *this_fn, void *call_site) {
add_trace(1, (unsigned long)this_fn);
struct timeval tv;
struct function_record *r;
gettimeofday(&tv, NULL);
add_trace(1, (unsigned long)this_fn, &tv);

r = new_record((unsigned long)this_fn);
if (!r->depth)
r->entry = tv;
r->depth++;
r->calls++;
}

void __cyg_profile_func_exit (void *this_fn, void *call_site) {
add_trace(0, (unsigned long)this_fn);
struct timeval tv;
struct function_record *r;

gettimeofday(&tv, NULL);
add_trace(0, (unsigned long)this_fn, &tv);

r = new_record((unsigned long)this_fn);
r->depth--;
if (!r->depth) {
accrue_time(r, &tv);
}
}

void _init(void) {
trace_buf = malloc(TRACE_BUF_SIZE * sizeof *trace_buf);
next = trace_buf;
trace_fd = open("trace.out", O_WRONLY|O_TRUNC|O_CREAT, 0600);
if (trace_fd < 0) {
perror("opening trace.out");
exit(1);
void __attribute__((constructor)) start_profiling(void);
void start_profiling(void) {
const char *trace_file = getenv("LANGER_TRACE_ALL");
if (trace_file) {
trace_buf = malloc(TRACE_BUF_SIZE * sizeof *trace_buf);
next = trace_buf;
trace_fd = open(trace_file, O_WRONLY|O_TRUNC|O_CREAT, 0600);
if (trace_fd < 0) {
perror("opening trace file");
exit(1);
}
}

fn_hash = malloc(HASH_SIZE * sizeof *fn_hash);
memset(fn_hash, 0, HASH_SIZE * sizeof *fn_hash);
}

void _fini(void) {
close(trace_fd);
void __attribute__((destructor)) stop_profiling(void);
void stop_profiling(void) {
if (trace_fd > 0) {
flush_trace_buf();
close(trace_fd);
}
log_times();
}
45 changes: 45 additions & 0 deletions print-prof
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/usr/bin/env python
import struct
import sys
import subprocess

prof_entry = struct.Struct("@LLLL")

def usage():
print >>sys.stdout, "Usage: %s langer.prof prog" % (sys.argv[0],)

def main(args):
if len(args) < 2:
usage()
sys.exit(1)

trace_file = args[0]
exe = args[1]
f = open(trace_file, 'r')
spc = ''

p = subprocess.Popen(['addr2line', '-f', '-e', exe],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE)

fns = []

while True:
buf = f.read(prof_entry.size)
if len(buf) < prof_entry.size:
break
(ip, calls, s, us) = prof_entry.unpack(buf)

p.stdin.write("0x%x\n" % (ip,))
fn = p.stdout.readline().strip()
line = p.stdout.readline().strip()

fns.append((fn, line, ip, calls, s + 1e-6*us))

fns.sort(key=lambda e:e[4], reverse=True)

for (fn,line,ip,calls,t) in fns:
print "[%s] %s (%d calls)" % (fn, t, calls)

if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))

0 comments on commit ac10c55

Please sign in to comment.