Skip to content

Commit

Permalink
memprof: extend symtab with C symbols
Browse files Browse the repository at this point in the history
This commit provides an ability to enrich memprof's symbol table with
information about C symbols. The parser is updated correspondingly.

If there is .symtab section or at least .dynsym segment in a shared
library, then memprof dumps its contents in symtab. The <sym> was
extended the following way:
| sym := sym-lua | sym-cfunc | sym-trace | sym-final
| sym-cfunc := sym-header sym-addr sym-name

If none of those are present, then instead of a symbol name and its
address there will be name and address of a shared library containing
that symbol.

Resolves tarantool/tarantool#5813

Reviewed-by: Sergey Kaplun <skaplun@tarantool.org>
Reviewed-by: Igor Munkin <imun@tarantool.org>
Signed-off-by: Igor Munkin <imun@tarantool.org>
  • Loading branch information
mkokryashkin authored and igormunkin committed Apr 22, 2022
1 parent 0243fb7 commit 88d2600
Show file tree
Hide file tree
Showing 18 changed files with 720 additions and 6 deletions.
2 changes: 1 addition & 1 deletion Makefile.original
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \
dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \
dis_mips64.lua dis_mips64el.lua vmdef.lua
FILES_UTILSLIB= bufread.lua symtab.lua
FILES_UTILSLIB= avl.lua bufread.lua symtab.lua
FILES_MEMPROFLIB= parse.lua humanize.lua
FILES_TOOLSLIB= memprof.lua
FILE_TMEMPROF= luajit-parse-memprof
Expand Down
5 changes: 5 additions & 0 deletions src/lj_arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,11 @@
#define LJ_HASMEMPROF 0
#else
#define LJ_HASMEMPROF 1
#if LJ_TARGET_LINUX
#define LJ_HASRESOLVER 1
#else
#define LJ_HASRESOLVER 0
#endif
#endif

#endif
328 changes: 328 additions & 0 deletions src/lj_memprof.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#define lj_memprof_c
#define LUA_CORE

#define _GNU_SOURCE

#include <errno.h>

#include "lj_arch.h"
Expand All @@ -19,6 +21,14 @@
#include "lj_frame.h"
#include "lj_debug.h"

#if LJ_HASRESOLVER
#include <elf.h>
#include <link.h>
#include <stdio.h>
#include <sys/auxv.h>
#include "lj_gc.h"
#endif

#if LJ_HASJIT
#include "lj_dispatch.h"
#endif
Expand Down Expand Up @@ -71,12 +81,326 @@ static void dump_symtab_proto(struct lj_wbuf *out, const GCproto *pt)
lj_wbuf_addu64(out, (uint64_t)pt->firstline);
}

#if LJ_HASRESOLVER

struct ghashtab_header {
uint32_t nbuckets;
uint32_t symoffset;
uint32_t bloom_size;
uint32_t bloom_shift;
};

static uint32_t ghashtab_size(ElfW(Addr) ghashtab)
{
/*
** There is no easy way to get count of symbols in GNU hashtable, so the
** only way to do this is to take highest possible non-empty bucket and
** iterate through its symbols until the last chain is over.
*/
uint32_t last_entry = 0;

const uint32_t *chain = NULL;
struct ghashtab_header *header = (struct ghashtab_header *)ghashtab;
/*
** sizeof(size_t) returns 8, if compiled with 64-bit compiler, and 4 if
** compiled with 32-bit compiler. It is the best option to determine which
** kind of CPU we are running on.
*/
const char *buckets = (char *)ghashtab + sizeof(struct ghashtab_header) +
sizeof(size_t) * header->bloom_size;

uint32_t *cur_bucket = (uint32_t *)buckets;
uint32_t i;
for (i = 0; i < header->nbuckets; ++i) {
if (last_entry < *cur_bucket)
last_entry = *cur_bucket;
cur_bucket++;
}

if (last_entry < header->symoffset)
return header->symoffset;

chain = (uint32_t *)(buckets + sizeof(uint32_t) * header->nbuckets);
/* The chain ends with the lowest bit set to 1. */
while (!(chain[last_entry - header->symoffset] & 1))
last_entry++;

return ++last_entry;
}

static void write_c_symtab(ElfW(Sym *) sym, char *strtab, ElfW(Addr) so_addr,
size_t sym_cnt, struct lj_wbuf *buf)
{
/*
** Index 0 in ELF symtab is used to represent undefined symbols. Hence, we
** can just start with index 1.
**
** For more information, see:
** https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-79797.html
*/

ElfW(Word) sym_index;
for (sym_index = 1; sym_index < sym_cnt; sym_index++) {
/*
** ELF32_ST_TYPE and ELF64_ST_TYPE are the same, so we can use
** ELF32_ST_TYPE for both 64-bit and 32-bit ELFs.
**
** For more, see https://github.com/torvalds/linux/blob/9137eda53752ef73148e42b0d7640a00f1bc96b1/include/uapi/linux/elf.h#L135
*/
if (ELF32_ST_TYPE(sym[sym_index].st_info) == STT_FUNC &&
sym[sym_index].st_name != 0) {
char *sym_name = &strtab[sym[sym_index].st_name];
lj_wbuf_addbyte(buf, SYMTAB_CFUNC);
lj_wbuf_addu64(buf, sym[sym_index].st_value + so_addr);
lj_wbuf_addstring(buf, sym_name);
}
}
}

static int dump_sht_symtab(const char *elf_name, struct lj_wbuf *buf,
lua_State *L, const ElfW(Addr) so_addr)
{
int status = 0;

char *strtab = NULL;
ElfW(Shdr *) section_headers = NULL;
ElfW(Sym *) sym = NULL;
ElfW(Ehdr) elf_header = {};

ElfW(Off) sym_off = 0;
ElfW(Off) strtab_off = 0;

size_t sym_cnt = 0;
size_t strtab_size = 0;
size_t header_index = 0;

size_t shoff = 0; /* Section headers offset. */
size_t shnum = 0; /* Section headers number. */
size_t shentsize = 0; /* Section header entry size. */

FILE *elf_file = fopen(elf_name, "rb");

if (elf_file == NULL)
return -1;

if (fread(&elf_header, sizeof(elf_header), 1, elf_file) != sizeof(elf_header)
&& ferror(elf_file) != 0)
goto error;
if (memcmp(elf_header.e_ident, ELFMAG, SELFMAG) != 0)
/* Not a valid ELF file. */
goto error;

shoff = elf_header.e_shoff;
shnum = elf_header.e_shnum;
shentsize = elf_header.e_shentsize;

if (shoff == 0 || shnum == 0 || shentsize == 0)
/* No sections in ELF. */
goto error;

/*
** Memory occupied by section headers is unlikely to be more than 160B, but
** 32-bit and 64-bit ELF files may have sections of different sizes and some
** of the sections may duiplicate, so we need to take that into account.
*/
section_headers = lj_mem_new(L, shnum * shentsize);
if (section_headers == NULL)
goto error;

if (fseek(elf_file, shoff, SEEK_SET) != 0)
goto error;

if (fread(section_headers, shentsize, shnum, elf_file) != shentsize * shnum
&& ferror(elf_file) != 0)
goto error;

for (header_index = 0; header_index < shnum; ++header_index) {
if (section_headers[header_index].sh_type == SHT_SYMTAB) {
ElfW(Shdr) sym_hdr = section_headers[header_index];
ElfW(Shdr) strtab_hdr = section_headers[sym_hdr.sh_link];
size_t symtab_size = sym_hdr.sh_size;

sym_off = sym_hdr.sh_offset;
sym_cnt = symtab_size / sym_hdr.sh_entsize;

strtab_off = strtab_hdr.sh_offset;
strtab_size = strtab_hdr.sh_size;
break;
}
}

if (sym_off == 0 || strtab_off == 0 || sym_cnt == 0)
goto error;

/* Load symtab into memory. */
sym = lj_mem_new(L, sym_cnt * sizeof(ElfW(Sym)));
if (sym == NULL)
goto error;
if (fseek(elf_file, sym_off, SEEK_SET) != 0)
goto error;
if (fread(sym, sizeof(ElfW(Sym)), sym_cnt, elf_file) !=
sizeof(ElfW(Sym)) * sym_cnt && ferror(elf_file) != 0)
goto error;


/* Load strtab into memory. */
strtab = lj_mem_new(L, strtab_size * sizeof(char));
if (strtab == NULL)
goto error;
if (fseek(elf_file, strtab_off, SEEK_SET) != 0)
goto error;
if (fread(strtab, sizeof(char), strtab_size, elf_file) !=
sizeof(char) * strtab_size && ferror(elf_file) != 0)
goto error;

write_c_symtab(sym, strtab, so_addr, sym_cnt, buf);

goto end;

error:
status = -1;

end:
if (sym != NULL)
lj_mem_free(G(L), sym, sym_cnt * sizeof(ElfW(Sym)));
if(strtab != NULL)
lj_mem_free(G(L), strtab, strtab_size * sizeof(char));
if(section_headers != NULL)
lj_mem_free(G(L), section_headers, shnum * shentsize);

fclose(elf_file);

return status;
}

static int dump_dyn_symtab(struct dl_phdr_info *info, struct lj_wbuf *buf)
{
size_t header_index;
for (header_index = 0; header_index < info->dlpi_phnum; ++header_index) {
if (info->dlpi_phdr[header_index].p_type == PT_DYNAMIC) {
ElfW(Dyn *) dyn =
(ElfW(Dyn) *)(info->dlpi_addr + info->dlpi_phdr[header_index].p_vaddr);
ElfW(Sym *) sym = NULL;
ElfW(Word *) hashtab = NULL;
ElfW(Addr) ghashtab = 0;
ElfW(Word) sym_cnt = 0;

char *strtab = 0;

for(; dyn->d_tag != DT_NULL; dyn++) {
switch(dyn->d_tag) {
case DT_HASH:
hashtab = (ElfW(Word *))dyn->d_un.d_ptr;
break;
case DT_GNU_HASH:
ghashtab = dyn->d_un.d_ptr;
break;
case DT_STRTAB:
strtab = (char *)dyn->d_un.d_ptr;
break;
case DT_SYMTAB:
sym = (ElfW(Sym *))dyn->d_un.d_ptr;
break;
default:
break;
}
}

if ((hashtab == NULL && ghashtab == 0) || strtab == NULL || sym == NULL)
/* Not enough data to resolve symbols. */
return 1;

/*
** A hash table consists of Elf32_Word or Elf64_Word objects that provide
** for symbol table access. Hash table has the following organization:
** +-------------------+
** | nbucket |
** +-------------------+
** | nchain |
** +-------------------+
** | bucket[0] |
** | ... |
** | bucket[nbucket-1] |
** +-------------------+
** | chain[0] |
** | ... |
** | chain[nchain-1] |
** +-------------------+
** Chain table entries parallel the symbol table. The number of symbol
** table entries should equal nchain, so symbol table indexes also select
** chain table entries. Since the chain array values are indexes for not
** only the chain array itself, but also for the symbol table, the chain
** array must be the same size as the symbol table. This makes nchain
** equal to the length of the symbol table.
**
** For more, see https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-48031.html
*/
sym_cnt = ghashtab == 0 ? hashtab[1] : ghashtab_size(ghashtab);
write_c_symtab(sym, strtab, info->dlpi_addr, sym_cnt, buf);
return 0;
}
}

return 1;
}

struct symbol_resolver_conf {
struct lj_wbuf *buf;
lua_State *L;
};

static int resolve_symbolnames(struct dl_phdr_info *info, size_t info_size,
void *data)
{
struct symbol_resolver_conf *conf = data;
struct lj_wbuf *buf = conf->buf;
lua_State *L = conf->L;

UNUSED(info_size);

/* Skip vDSO library. */
if (info->dlpi_addr == getauxval(AT_SYSINFO_EHDR))
return 0;

/*
** Main way: try to open ELF and read SHT_SYMTAB, SHT_STRTAB and SHT_HASH
** sections from it.
*/
if (dump_sht_symtab(info->dlpi_name, buf, L, info->dlpi_addr) == 0) {
/* Empty body. */
}
/* First fallback: dump functions only from PT_DYNAMIC segment. */
else if(dump_dyn_symtab(info, buf) == 0) {
/* Empty body. */
}
/*
** Last resort: dump ELF size and address to show .so name for its functions
** in memprof output.
*/
else {
lj_wbuf_addbyte(buf, SYMTAB_CFUNC);
lj_wbuf_addu64(buf, info->dlpi_addr);
lj_wbuf_addstring(buf, info->dlpi_name);
}

return 0;
}

#endif /* LJ_HASRESOLVER */

static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)
{
const GCRef *iter = &g->gc.root;
const GCobj *o;
const size_t ljs_header_len = sizeof(ljs_header) / sizeof(ljs_header[0]);

#if LJ_HASRESOLVER
struct symbol_resolver_conf conf = {
.buf = out,
.L = gco2th(gcref(g->cur_L)),
};
#endif

/* Write prologue. */
lj_wbuf_addn(out, ljs_header, ljs_header_len);

Expand All @@ -99,6 +423,10 @@ static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)
iter = &o->gch.nextgc;
}

#if LJ_HASRESOLVER
/* Write C symbols. */
dl_iterate_phdr(resolve_symbolnames, &conf);
#endif
lj_wbuf_addbyte(out, SYMTAB_FINAL);
}

Expand Down

0 comments on commit 88d2600

Please sign in to comment.