8
8
#define lj_memprof_c
9
9
#define LUA_CORE
10
10
11
+ #define _GNU_SOURCE
12
+
11
13
#include <errno.h>
12
14
13
15
#include "lj_arch.h"
19
21
#include "lj_frame.h"
20
22
#include "lj_debug.h"
21
23
24
+ #if LJ_HASRESOLVER
25
+ #include <elf.h>
26
+ #include <link.h>
27
+ #include <stdio.h>
28
+ #include <sys/auxv.h>
29
+ #include "lj_gc.h"
30
+ #endif
31
+
22
32
#if LJ_HASJIT
23
33
#include "lj_dispatch.h"
24
34
#endif
@@ -71,12 +81,326 @@ static void dump_symtab_proto(struct lj_wbuf *out, const GCproto *pt)
71
81
lj_wbuf_addu64 (out , (uint64_t )pt -> firstline );
72
82
}
73
83
84
+ #if LJ_HASRESOLVER
85
+
86
+ struct ghashtab_header {
87
+ uint32_t nbuckets ;
88
+ uint32_t symoffset ;
89
+ uint32_t bloom_size ;
90
+ uint32_t bloom_shift ;
91
+ };
92
+
93
+ static uint32_t ghashtab_size (ElfW (Addr ) ghashtab )
94
+ {
95
+ /*
96
+ ** There is no easy way to get count of symbols in GNU hashtable, so the
97
+ ** only way to do this is to take highest possible non-empty bucket and
98
+ ** iterate through its symbols until the last chain is over.
99
+ */
100
+ uint32_t last_entry = 0 ;
101
+
102
+ const uint32_t * chain = NULL ;
103
+ struct ghashtab_header * header = (struct ghashtab_header * )ghashtab ;
104
+ /*
105
+ ** sizeof(size_t) returns 8, if compiled with 64-bit compiler, and 4 if
106
+ ** compiled with 32-bit compiler. It is the best option to determine which
107
+ ** kind of CPU we are running on.
108
+ */
109
+ const char * buckets = (char * )ghashtab + sizeof (struct ghashtab_header ) +
110
+ sizeof (size_t ) * header -> bloom_size ;
111
+
112
+ uint32_t * cur_bucket = (uint32_t * )buckets ;
113
+ uint32_t i ;
114
+ for (i = 0 ; i < header -> nbuckets ; ++ i ) {
115
+ if (last_entry < * cur_bucket )
116
+ last_entry = * cur_bucket ;
117
+ cur_bucket ++ ;
118
+ }
119
+
120
+ if (last_entry < header -> symoffset )
121
+ return header -> symoffset ;
122
+
123
+ chain = (uint32_t * )(buckets + sizeof (uint32_t ) * header -> nbuckets );
124
+ /* The chain ends with the lowest bit set to 1. */
125
+ while (!(chain [last_entry - header -> symoffset ] & 1 ))
126
+ last_entry ++ ;
127
+
128
+ return ++ last_entry ;
129
+ }
130
+
131
+ static void write_c_symtab (ElfW (Sym * ) sym , char * strtab , ElfW (Addr ) so_addr ,
132
+ size_t sym_cnt , struct lj_wbuf * buf )
133
+ {
134
+ /*
135
+ ** Index 0 in ELF symtab is used to represent undefined symbols. Hence, we
136
+ ** can just start with index 1.
137
+ **
138
+ ** For more information, see:
139
+ ** https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-79797.html
140
+ */
141
+
142
+ ElfW (Word ) sym_index ;
143
+ for (sym_index = 1 ; sym_index < sym_cnt ; sym_index ++ ) {
144
+ /*
145
+ ** ELF32_ST_TYPE and ELF64_ST_TYPE are the same, so we can use
146
+ ** ELF32_ST_TYPE for both 64-bit and 32-bit ELFs.
147
+ **
148
+ ** For more, see https://github.com/torvalds/linux/blob/9137eda53752ef73148e42b0d7640a00f1bc96b1/include/uapi/linux/elf.h#L135
149
+ */
150
+ if (ELF32_ST_TYPE (sym [sym_index ].st_info ) == STT_FUNC &&
151
+ sym [sym_index ].st_name != 0 ) {
152
+ char * sym_name = & strtab [sym [sym_index ].st_name ];
153
+ lj_wbuf_addbyte (buf , SYMTAB_CFUNC );
154
+ lj_wbuf_addu64 (buf , sym [sym_index ].st_value + so_addr );
155
+ lj_wbuf_addstring (buf , sym_name );
156
+ }
157
+ }
158
+ }
159
+
160
+ static int dump_sht_symtab (const char * elf_name , struct lj_wbuf * buf ,
161
+ lua_State * L , const ElfW (Addr ) so_addr )
162
+ {
163
+ int status = 0 ;
164
+
165
+ char * strtab = NULL ;
166
+ ElfW (Shdr * ) section_headers = NULL ;
167
+ ElfW (Sym * ) sym = NULL ;
168
+ ElfW (Ehdr ) elf_header = {};
169
+
170
+ ElfW (Off ) sym_off = 0 ;
171
+ ElfW (Off ) strtab_off = 0 ;
172
+
173
+ size_t sym_cnt = 0 ;
174
+ size_t strtab_size = 0 ;
175
+ size_t header_index = 0 ;
176
+
177
+ size_t shoff = 0 ; /* Section headers offset. */
178
+ size_t shnum = 0 ; /* Section headers number. */
179
+ size_t shentsize = 0 ; /* Section header entry size. */
180
+
181
+ FILE * elf_file = fopen (elf_name , "rb" );
182
+
183
+ if (elf_file == NULL )
184
+ return -1 ;
185
+
186
+ if (fread (& elf_header , sizeof (elf_header ), 1 , elf_file ) != sizeof (elf_header )
187
+ && ferror (elf_file ) != 0 )
188
+ goto error ;
189
+ if (memcmp (elf_header .e_ident , ELFMAG , SELFMAG ) != 0 )
190
+ /* Not a valid ELF file. */
191
+ goto error ;
192
+
193
+ shoff = elf_header .e_shoff ;
194
+ shnum = elf_header .e_shnum ;
195
+ shentsize = elf_header .e_shentsize ;
196
+
197
+ if (shoff == 0 || shnum == 0 || shentsize == 0 )
198
+ /* No sections in ELF. */
199
+ goto error ;
200
+
201
+ /*
202
+ ** Memory occupied by section headers is unlikely to be more than 160B, but
203
+ ** 32-bit and 64-bit ELF files may have sections of different sizes and some
204
+ ** of the sections may duiplicate, so we need to take that into account.
205
+ */
206
+ section_headers = lj_mem_new (L , shnum * shentsize );
207
+ if (section_headers == NULL )
208
+ goto error ;
209
+
210
+ if (fseek (elf_file , shoff , SEEK_SET ) != 0 )
211
+ goto error ;
212
+
213
+ if (fread (section_headers , shentsize , shnum , elf_file ) != shentsize * shnum
214
+ && ferror (elf_file ) != 0 )
215
+ goto error ;
216
+
217
+ for (header_index = 0 ; header_index < shnum ; ++ header_index ) {
218
+ if (section_headers [header_index ].sh_type == SHT_SYMTAB ) {
219
+ ElfW (Shdr ) sym_hdr = section_headers [header_index ];
220
+ ElfW (Shdr ) strtab_hdr = section_headers [sym_hdr .sh_link ];
221
+ size_t symtab_size = sym_hdr .sh_size ;
222
+
223
+ sym_off = sym_hdr .sh_offset ;
224
+ sym_cnt = symtab_size / sym_hdr .sh_entsize ;
225
+
226
+ strtab_off = strtab_hdr .sh_offset ;
227
+ strtab_size = strtab_hdr .sh_size ;
228
+ break ;
229
+ }
230
+ }
231
+
232
+ if (sym_off == 0 || strtab_off == 0 || sym_cnt == 0 )
233
+ goto error ;
234
+
235
+ /* Load symtab into memory. */
236
+ sym = lj_mem_new (L , sym_cnt * sizeof (ElfW (Sym )));
237
+ if (sym == NULL )
238
+ goto error ;
239
+ if (fseek (elf_file , sym_off , SEEK_SET ) != 0 )
240
+ goto error ;
241
+ if (fread (sym , sizeof (ElfW (Sym )), sym_cnt , elf_file ) !=
242
+ sizeof (ElfW (Sym )) * sym_cnt && ferror (elf_file ) != 0 )
243
+ goto error ;
244
+
245
+
246
+ /* Load strtab into memory. */
247
+ strtab = lj_mem_new (L , strtab_size * sizeof (char ));
248
+ if (strtab == NULL )
249
+ goto error ;
250
+ if (fseek (elf_file , strtab_off , SEEK_SET ) != 0 )
251
+ goto error ;
252
+ if (fread (strtab , sizeof (char ), strtab_size , elf_file ) !=
253
+ sizeof (char ) * strtab_size && ferror (elf_file ) != 0 )
254
+ goto error ;
255
+
256
+ write_c_symtab (sym , strtab , so_addr , sym_cnt , buf );
257
+
258
+ goto end ;
259
+
260
+ error :
261
+ status = -1 ;
262
+
263
+ end :
264
+ if (sym != NULL )
265
+ lj_mem_free (G (L ), sym , sym_cnt * sizeof (ElfW (Sym )));
266
+ if (strtab != NULL )
267
+ lj_mem_free (G (L ), strtab , strtab_size * sizeof (char ));
268
+ if (section_headers != NULL )
269
+ lj_mem_free (G (L ), section_headers , shnum * shentsize );
270
+
271
+ fclose (elf_file );
272
+
273
+ return status ;
274
+ }
275
+
276
+ static int dump_dyn_symtab (struct dl_phdr_info * info , struct lj_wbuf * buf )
277
+ {
278
+ size_t header_index ;
279
+ for (header_index = 0 ; header_index < info -> dlpi_phnum ; ++ header_index ) {
280
+ if (info -> dlpi_phdr [header_index ].p_type == PT_DYNAMIC ) {
281
+ ElfW (Dyn * ) dyn =
282
+ (ElfW (Dyn ) * )(info -> dlpi_addr + info -> dlpi_phdr [header_index ].p_vaddr );
283
+ ElfW (Sym * ) sym = NULL ;
284
+ ElfW (Word * ) hashtab = NULL ;
285
+ ElfW (Addr ) ghashtab = 0 ;
286
+ ElfW (Word ) sym_cnt = 0 ;
287
+
288
+ char * strtab = 0 ;
289
+
290
+ for (; dyn -> d_tag != DT_NULL ; dyn ++ ) {
291
+ switch (dyn -> d_tag ) {
292
+ case DT_HASH :
293
+ hashtab = (ElfW (Word * ))dyn -> d_un .d_ptr ;
294
+ break ;
295
+ case DT_GNU_HASH :
296
+ ghashtab = dyn -> d_un .d_ptr ;
297
+ break ;
298
+ case DT_STRTAB :
299
+ strtab = (char * )dyn -> d_un .d_ptr ;
300
+ break ;
301
+ case DT_SYMTAB :
302
+ sym = (ElfW (Sym * ))dyn -> d_un .d_ptr ;
303
+ break ;
304
+ default :
305
+ break ;
306
+ }
307
+ }
308
+
309
+ if ((hashtab == NULL && ghashtab == 0 ) || strtab == NULL || sym == NULL )
310
+ /* Not enough data to resolve symbols. */
311
+ return 1 ;
312
+
313
+ /*
314
+ ** A hash table consists of Elf32_Word or Elf64_Word objects that provide
315
+ ** for symbol table access. Hash table has the following organization:
316
+ ** +-------------------+
317
+ ** | nbucket |
318
+ ** +-------------------+
319
+ ** | nchain |
320
+ ** +-------------------+
321
+ ** | bucket[0] |
322
+ ** | ... |
323
+ ** | bucket[nbucket-1] |
324
+ ** +-------------------+
325
+ ** | chain[0] |
326
+ ** | ... |
327
+ ** | chain[nchain-1] |
328
+ ** +-------------------+
329
+ ** Chain table entries parallel the symbol table. The number of symbol
330
+ ** table entries should equal nchain, so symbol table indexes also select
331
+ ** chain table entries. Since the chain array values are indexes for not
332
+ ** only the chain array itself, but also for the symbol table, the chain
333
+ ** array must be the same size as the symbol table. This makes nchain
334
+ ** equal to the length of the symbol table.
335
+ **
336
+ ** For more, see https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-48031.html
337
+ */
338
+ sym_cnt = ghashtab == 0 ? hashtab [1 ] : ghashtab_size (ghashtab );
339
+ write_c_symtab (sym , strtab , info -> dlpi_addr , sym_cnt , buf );
340
+ return 0 ;
341
+ }
342
+ }
343
+
344
+ return 1 ;
345
+ }
346
+
347
+ struct symbol_resolver_conf {
348
+ struct lj_wbuf * buf ;
349
+ lua_State * L ;
350
+ };
351
+
352
+ static int resolve_symbolnames (struct dl_phdr_info * info , size_t info_size ,
353
+ void * data )
354
+ {
355
+ struct symbol_resolver_conf * conf = data ;
356
+ struct lj_wbuf * buf = conf -> buf ;
357
+ lua_State * L = conf -> L ;
358
+
359
+ UNUSED (info_size );
360
+
361
+ /* Skip vDSO library. */
362
+ if (info -> dlpi_addr == getauxval (AT_SYSINFO_EHDR ))
363
+ return 0 ;
364
+
365
+ /*
366
+ ** Main way: try to open ELF and read SHT_SYMTAB, SHT_STRTAB and SHT_HASH
367
+ ** sections from it.
368
+ */
369
+ if (dump_sht_symtab (info -> dlpi_name , buf , L , info -> dlpi_addr ) == 0 ) {
370
+ /* Empty body. */
371
+ }
372
+ /* First fallback: dump functions only from PT_DYNAMIC segment. */
373
+ else if (dump_dyn_symtab (info , buf ) == 0 ) {
374
+ /* Empty body. */
375
+ }
376
+ /*
377
+ ** Last resort: dump ELF size and address to show .so name for its functions
378
+ ** in memprof output.
379
+ */
380
+ else {
381
+ lj_wbuf_addbyte (buf , SYMTAB_CFUNC );
382
+ lj_wbuf_addu64 (buf , info -> dlpi_addr );
383
+ lj_wbuf_addstring (buf , info -> dlpi_name );
384
+ }
385
+
386
+ return 0 ;
387
+ }
388
+
389
+ #endif /* LJ_HASRESOLVER */
390
+
74
391
static void dump_symtab (struct lj_wbuf * out , const struct global_State * g )
75
392
{
76
393
const GCRef * iter = & g -> gc .root ;
77
394
const GCobj * o ;
78
395
const size_t ljs_header_len = sizeof (ljs_header ) / sizeof (ljs_header [0 ]);
79
396
397
+ #if LJ_HASRESOLVER
398
+ struct symbol_resolver_conf conf = {
399
+ .buf = out ,
400
+ .L = gco2th (gcref (g -> cur_L )),
401
+ };
402
+ #endif
403
+
80
404
/* Write prologue. */
81
405
lj_wbuf_addn (out , ljs_header , ljs_header_len );
82
406
@@ -99,6 +423,10 @@ static void dump_symtab(struct lj_wbuf *out, const struct global_State *g)
99
423
iter = & o -> gch .nextgc ;
100
424
}
101
425
426
+ #if LJ_HASRESOLVER
427
+ /* Write C symbols. */
428
+ dl_iterate_phdr (resolve_symbolnames , & conf );
429
+ #endif
102
430
lj_wbuf_addbyte (out , SYMTAB_FINAL );
103
431
}
104
432
0 commit comments