Skip to content
This repository has been archived by the owner on Nov 3, 2021. It is now read-only.

Commit

Permalink
x86 oprofile enablement
Browse files Browse the repository at this point in the history
  • Loading branch information
Bruce Beare committed Mar 4, 2010
1 parent 56b2728 commit 751a443
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 50 deletions.
2 changes: 0 additions & 2 deletions Android.mk
@@ -1,3 +1 @@
ifeq ($(TARGET_ARCH),arm)
include $(call all-subdir-makefiles)
endif
15 changes: 10 additions & 5 deletions libop/op_hw_specific.h
Expand Up @@ -20,7 +20,8 @@ static inline int cpuid_vendor(char *vnd)
char v[12];
} v;
unsigned eax;
asm("cpuid" : "=a" (eax), "=b" (v.b), "=c" (v.c), "=d" (v.d) : "0" (0));
asm volatile( "pushl %%ebx; cpuid; movl %%ebx, %1; popl %%ebx"
: "=a" (eax), "=S" (v.b), "=c" (v.c), "=d" (v.d) : "0" (0));
return !strncmp(v.v, vnd, 12);
}

Expand All @@ -46,7 +47,8 @@ static inline void workaround_nehalem_aaj79(unsigned *ebx)

if (!cpuid_vendor("GenuineIntel"))
return;
asm("cpuid" : "=a" (v.eax) : "0" (1) : "ecx","ebx","edx");
asm volatile( "pushl %%ebx; cpuid; movl %%ebx, %1; popl %%ebx"
: "=a" (v.eax) : "0" (1) : "ecx","edx");
model = (v.ext_model << 4) + v.model;
if (v.family != 6 || model != 26 || v.stepping > 4)
return;
Expand All @@ -57,7 +59,8 @@ static inline unsigned arch_get_filter(op_cpu cpu_type)
{
if (cpu_type == CPU_ARCH_PERFMON) {
unsigned ebx, eax;
asm("cpuid" : "=a" (eax), "=b" (ebx) : "0" (0xa) : "ecx","edx");
asm volatile( "pushl %%ebx; cpuid; movl %%ebx, %1; popl %%ebx"
: "=a" (eax), "=S" (ebx) : "0" (0xa) : "ecx","edx");
workaround_nehalem_aaj79(&ebx);
return ebx & num_to_mask(eax >> 24);
}
Expand All @@ -68,7 +71,8 @@ static inline int arch_num_counters(op_cpu cpu_type)
{
if (cpu_type == CPU_ARCH_PERFMON) {
unsigned v;
asm("cpuid" : "=a" (v) : "0" (0xa) : "ebx","ecx","edx");
asm volatile( "pushl %%ebx; cpuid; movl %%eax, %1; popl %%ebx"
: "=a" (v) : "0" (0xa) : "ecx","edx");
return (v >> 8) & 0xff;
}
return -1;
Expand All @@ -77,7 +81,8 @@ static inline int arch_num_counters(op_cpu cpu_type)
static inline unsigned arch_get_counter_mask(void)
{
unsigned v;
asm("cpuid" : "=a" (v) : "0" (0xa) : "ebx","ecx","edx");
asm volatile( "pushl %%ebx; cpuid; movl %%ebx, %1; popl %%ebx"
: "=a" (v) : "0" (0xa) : "ecx","edx");
return num_to_mask((v >> 8) & 0xff);
}

Expand Down
176 changes: 133 additions & 43 deletions opcontrol/opcontrol.cpp
Expand Up @@ -30,24 +30,24 @@

#include "op_config.h"

#if 0
#define verbose(fmt...) printf(fmt)
#else
#define verbose(fmt...)
#endif
#define verbose(fmt...) if (verbose_print) printf(fmt)

/* Experiments found that using a small interval may hang the device, and the
* more events tracked simultaneously, the longer the interval has to be.
*/

#if !defined(WITH_ARM_V7_A)
#if defined(__i386__) || defined(__x86_64__)
#define MAX_EVENTS 2
int min_count[MAX_EVENTS] = {60000, 100000};
#elif !defined(WITH_ARM_V7_A)
#define MAX_EVENTS 3
int min_count[MAX_EVENTS] = {150000, 200000, 250000};
#else
#define MAX_EVENTS 5
int min_count[MAX_EVENTS] = {150000, 200000, 250000, 300000, 350000};
#endif

int verbose_print;
int list_events;
int show_usage;
int setup;
Expand All @@ -60,6 +60,7 @@ int reset;
int selected_events[MAX_EVENTS];
int selected_counts[MAX_EVENTS];

char callgraph[8];
char kernel_range[512];
char vmlinux[512];

Expand All @@ -69,84 +70,133 @@ struct option long_options[] = {
{"reset", 0, &reset, 1},
{"setup", 0, &setup, 1},
{"quick", 0, &quick, 1},
{"callgraph", 1, 0, 'c'},
{"event", 1, 0, 'e'},
{"vmlinux", 1, 0, 'v'},
{"kernel-range", 1, 0, 'r'},
{"start", 0, &start, 1},
{"stop", 0, &stop, 1},
{"dump", 0, 0, 'd'},
{"shutdown", 0, 0, 'h'},
{"status", 0, 0, 't'},
{"verbose", 0, 0, 'V'},
{0, 0, 0, 0},
};

struct event_info {
int id;
int um;
const char *name;
const char *explanation;
} event_info[] = {
#if !defined(WITH_ARM_V7_A)
#if defined(__i386__) || defined(__x86_64__)
/* INTEL_ARCH_PERFMON events */

/* 0x3c counters:cpuid um:zero minimum:6000 filter:0 name:CPU_CLK_UNHALTED :
* Clock cycles when not halted
*/
{0x3c, 0, "CPU_CLK_UNHALTED",
"Clock cycles when not halted" },

/* event:0x3c counters:cpuid um:one minimum:6000 filter:2 name:UNHALTED_REFERENCE_CYCLES :
* Unhalted reference cycles
*/
{0x3c, 1, "UNHALTED_REFERENCE_CYCLES",
"Unhalted reference cycles" },

/* event:0xc0 counters:cpuid um:zero minimum:6000 filter:1 name:INST_RETIRED :
* number of instructions retired
*/
{0xc0, 0, "INST_RETIRED",
"number of instructions retired"},

/* event:0x2e counters:cpuid um:x41 minimum:6000 filter:5 name:LLC_MISSES :
* Last level cache demand requests from this core that missed the LLC
*/
{0x2e, 0x41, "LLC_MISSES",
"Last level cache demand requests from this core that missed the LLC"},

/* event:0x2e counters:cpuid um:x4f minimum:6000 filter:4 name:LLC_REFS :
* Last level cache demand requests from this core
*/
{0x2e, 0x4f, "LLC_REFS",
"Last level cache demand requests from this core"},

/* event:0xc4 counters:cpuid um:zero minimum:500 filter:6 name:BR_INST_RETIRED :
* number of branch instructions retired
*/
{0xc4, 0, "BR_INST_RETIRED",
"number of branch instructions retired"},

/* event:0xc5 counters:cpuid um:zero minimum:500 filter:7 name:BR_MISS_PRED_RETIRED :
* number of mispredicted branches retired (precise)
*/
{0xc5, 0, "BR_MISS_PRED_RETIRED",
"number of mispredicted branches retired (precise)"},

#elif !defined(WITH_ARM_V7_A)
/* ARM V6 events */
{0x00, "IFU_IFETCH_MISS",
{0x00, 0, "IFU_IFETCH_MISS",
"number of instruction fetch misses"},
{0x01, "CYCLES_IFU_MEM_STALL",
{0x01, 0, "CYCLES_IFU_MEM_STALL",
"cycles instruction fetch pipe is stalled"},
{0x02, "CYCLES_DATA_STALL",
{0x02, 0, "CYCLES_DATA_STALL",
"cycles stall occurs for due to data dependency"},
{0x03, "ITLB_MISS",
{0x03, 0, "ITLB_MISS",
"number of Instruction MicroTLB misses"},
{0x04, "DTLB_MISS",
{0x04, 0, "DTLB_MISS",
"number of Data MicroTLB misses"},
{0x05, "BR_INST_EXECUTED",
{0x05, 0, "BR_INST_EXECUTED",
"branch instruction executed w/ or w/o program flow change"},
{0x06, "BR_INST_MISS_PRED",
{0x06, 0, "BR_INST_MISS_PRED",
"branch mispredicted"},
{0x07, "INSN_EXECUTED",
{0x07, 0, "INSN_EXECUTED",
"instructions executed"},
{0x09, "DCACHE_ACCESS",
{0x09, 0, "DCACHE_ACCESS",
"data cache access, cacheable locations"},
{0x0a, "DCACHE_ACCESS_ALL",
{0x0a, 0, "DCACHE_ACCESS_ALL",
"data cache access, all locations"},
{0x0b, "DCACHE_MISS",
{0x0b, 0, "DCACHE_MISS",
"data cache miss"},
{0x0c, "DCACHE_WB",
{0x0c, 0, "DCACHE_WB",
"data cache writeback, 1 event for every half cacheline"},
{0x0d, "PC_CHANGE",
{0x0d, 0, "PC_CHANGE",
"number of times the program counter was changed without a mode switch"},
{0x0f, "TLB_MISS",
{0x0f, 0, "TLB_MISS",
"Main TLB miss"},
{0x10, "EXP_EXTERNAL",
{0x10, 0, "EXP_EXTERNAL",
"Explicit external data access"},
{0x11, "LSU_STALL",
{0x11, 0, "LSU_STALL",
"cycles stalled because Load Store request queue is full"},
{0x12, "WRITE_DRAIN",
{0x12, 0, "WRITE_DRAIN",
"Times write buffer was drained"},
{0xff, "CPU_CYCLES",
{0xff, 0, "CPU_CYCLES",
"clock cycles counter"},
#else
/* ARM V7 events */
{0x00, "PMNC_SW_INCR",
{0x00, 0, "PMNC_SW_INCR",
"Software increment of PMNC registers"},
{0x01, "IFETCH_MISS",
{0x01, 0, "IFETCH_MISS",
"Instruction fetch misses from cache or normal cacheable memory"},
{0x02, "ITLB_MISS",
{0x02, 0, "ITLB_MISS",
"Instruction fetch misses from TLB"},
{0x03, "DCACHE_REFILL",
{0x03, 0, "DCACHE_REFILL",
"Data R/W operation that causes a refill from cache or normal cacheable"
"memory"},
{0x04, "DCACHE_ACCESS",
{0x04, 0, "DCACHE_ACCESS",
"Data R/W from cache"},
{0x05, "DTLB_REFILL",
{0x05, 0, "DTLB_REFILL",
"Data R/W that causes a TLB refill"},
{0x06, "DREAD",
{0x06, 0, "DREAD",
"Data read architecturally executed (note: architecturally executed = for"
"instructions that are unconditional or that pass the condition code)"},
{0x07, "DWRITE",
{0x07, 0, "DWRITE",
"Data write architecturally executed"},
{0x08, "INSTR_EXECUTED",
{0x08, 0, "INSTR_EXECUTED",
"All executed instructions"},
{0x09, "EXC_TAKEN",
{0x09, 0, "EXC_TAKEN",
"Exception taken"},
{0x0A, "EXC_EXECUTED",
{0x0A, 0, "EXC_EXECUTED",
"Exception return architecturally executed"},
{0x0B, "CID_WRITE",
"Instruction that writes to the Context ID Register architecturally"
Expand Down Expand Up @@ -238,13 +288,19 @@ void usage()
printf("\nopcontrol: usage:\n"
" --list-events list event types\n"
" --help this message\n"
" --verbose show extra status\n"
" --setup setup directories\n"
#if defined(__i386__) || defined(__x86_64__)
" --quick setup and select CPU_CLK_UNHALTED:60000\n"
#else
" --quick setup and select CPU_CYCLES:150000\n"
#endif
" --status show configuration\n"
" --start start data collection\n"
" --stop stop data collection\n"
" --reset clears out data from current session\n"
" --shutdown kill the oprofile daeman\n"
" --callgraph=depth callgraph depth\n"
" --event=eventspec\n"
" Choose an event. May be specified multiple times.\n"
" eventspec is in the form of name[:count], where :\n"
Expand Down Expand Up @@ -459,8 +515,9 @@ void do_status()
printf(" %9u samples received\n", num);
num = read_num(OP_DRIVER_BASE"/stats/cpu0/sample_lost_overflow");
printf(" %9u samples lost overflow\n", num);
#if 0
/* FIXME - backtrace seems broken */

#if defined(__i386__) || defined(__x86_64__)
/* FIXME on ARM - backtrace seems broken there */
num = read_num(OP_DRIVER_BASE"/stats/cpu0/backtrace_aborted");
printf(" %9u backtrace aborted\n", num);
num = read_num(OP_DRIVER_BASE"/backtrace_depth");
Expand Down Expand Up @@ -495,13 +552,17 @@ int main(int argc, char * const argv[])
strcpy(kernel_range, "");

while (1) {
int c = getopt_long(argc, argv, "", long_options, &option_index);
int c = getopt_long(argc, argv, "c:e:v:r:dhVt", long_options, &option_index);
if (c == -1) {
break;
}
switch (c) {
case 0:
break;
/* --callgraph */
case 'c':
strncpy(callgraph, optarg, sizeof(callgraph));
break;
/* --event */
case 'e':
if (num_events == MAX_EVENTS) {
Expand All @@ -521,15 +582,33 @@ int main(int argc, char * const argv[])
case 'r':
sprintf(kernel_range, "-r %s", optarg);
break;
case 'd':
/* --dump */ {
int pid = read_num(OP_DATA_DIR"/lock");
echo_dev("1", 0, "dump", -1);
if (pid >= 0) {
sleep(1);
kill(pid, SIGHUP);
}
break;
}
/* --shutdown */
case 'h': {
int pid = read_num(OP_DATA_DIR"/lock");
if (pid >= 0) {
kill(pid, SIGHUP); /* Politely ask the daemon to close files */
sleep(1);
kill(pid, SIGTERM);/* Politely ask the daemon to die */
sleep(1);
kill(pid, SIGKILL);
}
setup_session_dir();
break;
}
/* --verbose */
case 'V':
verbose_print++;
break;
/* --status */
case 't':
do_status();
Expand All @@ -547,7 +626,11 @@ int main(int argc, char * const argv[])
}

if (quick) {
#if defined(__i386__) || defined(__x86_64__)
process_event("CPU_CLK_UNHALTED");
#else
process_event("CPU_CYCLES");
#endif
setup = 1;
}

Expand All @@ -566,12 +649,18 @@ int main(int argc, char * const argv[])
}
}

if (strlen(callgraph)) {
echo_dev(callgraph, 0, "backtrace_depth", -1);
}

if (num_events != 0) {
int i;

strcpy(command, "oprofiled --session-dir="OP_DATA_DIR);

#if !defined(WITH_ARM_V7_A)
#if defined(__i386__) || defined(__x86_64__)
/* Nothing */
#elif !defined(WITH_ARM_V7_A)
/* Since counter #3 can only handle CPU_CYCLES, check and shuffle the
* order a bit so that the maximal number of events can be profiled
* simultaneously
Expand Down Expand Up @@ -624,15 +713,16 @@ int main(int argc, char * const argv[])
* --events=CYCLES_DATA_STALL:2:0:200000:0:1:1,....
*/
snprintf(command+strlen(command), 1024 - strlen(command),
"%s:%d:%d:%d:0:1:1",
"%s:%d:%d:%d:%d:1:1",
event_info[event_idx].name,
event_info[event_idx].id,
i,
selected_counts[i]);
selected_counts[i],
event_info[event_idx].um);

setup_result |= echo_dev("1", 0, "user", i);
setup_result |= echo_dev("1", 0, "kernel", i);
setup_result |= echo_dev("0", 0, "unit_mask", i);
setup_result |= echo_dev(NULL, event_info[event_idx].um, "unit_mask", i);
setup_result |= echo_dev("1", 0, "enabled", i);
setup_result |= echo_dev(NULL, selected_counts[i], "count", i);
setup_result |= echo_dev(NULL, event_info[event_idx].id,
Expand Down

0 comments on commit 751a443

Please sign in to comment.