# Executable and Linkable Format (ELF)
- https://en.wikipedia.org/wiki/Executable_and_Linkable_Format
- common format for executables files, object code, shared libraries, and core dumps
<img src="./resources/ELF.png">
- an ELF file has two views: the program header shows the segments used at run time, whereas the section header lists the set of sections of the binary.
- let's compile the following program and examine ELF format using various tools

```c
// demo-programs/hello.c program
#include <stdio.h>
int main() {
    puts("Hello World!");
    return 0;
}
```

In [37]:
%%bash
in=./demo-programs/hello.c
out=hello

gcc -g -o $out $in
./$out

Hello World!


In [2]:
! ls -al hello

-rwxr-xr-x 1 root root 17812 Mar 11 13:16 hello


In [3]:
! cat hello

ELF              `  4   @      4    ( # "    4   4   4   `  `           �  �  �                             �  �                    \  \                       �  �           �.  �>  �>  (  ,           �.  �>  �>  �   �            �  �  �  D   D         P�td         L   L         Q�td                          R�td�.  �>  �>            /lib/ld-linux.so.2           GNU y�є���I��=]�\�?�         GNU                                      �K��                V                          "                 r               .              �                           libc.so.6 _IO_stdin_used puts __cxa_finalize __libc_start_main GLIBC_2.0 GLIBC_2.1.3 _ITM_deregisterTMCloneTable __gmon_start__ _ITM_registerTMCloneTable                          ii   @      si	   J       �>     �>     �?     @     �?    �?    �?    �?    @    @                                                      

## file utility
- displays some information about ELF files

In [5]:
! file ./demo-programs/hello.c

./demo-programs/hello.c: C source, ASCII text


In [9]:
! hexdump -C hello

00000000  7f 45 4c 46 01 01 01 00  00 00 00 00 00 00 00 00  |.ELF............|
00000010  03 00 03 00 01 00 00 00  60 10 00 00 34 00 00 00  |........`...4...|
00000020  1c 40 00 00 00 00 00 00  34 00 20 00 0b 00 28 00  |.@......4. ...(.|
00000030  23 00 22 00 06 00 00 00  34 00 00 00 34 00 00 00  |#.".....4...4...|
00000040  34 00 00 00 60 01 00 00  60 01 00 00 04 00 00 00  |4...`...`.......|
00000050  04 00 00 00 03 00 00 00  94 01 00 00 94 01 00 00  |................|
00000060  94 01 00 00 13 00 00 00  13 00 00 00 04 00 00 00  |................|
00000070  01 00 00 00 01 00 00 00  00 00 00 00 00 00 00 00  |................|
00000080  00 00 00 00 b8 03 00 00  b8 03 00 00 04 00 00 00  |................|
00000090  00 10 00 00 01 00 00 00  00 10 00 00 00 10 00 00  |................|
000000a0  00 10 00 00 5c 02 00 00  5c 02 00 00 05 00 00 00  |....\...\.......|
000000b0  00 10 00 00 01 00 00 00  00 20 00 00 00 20 00 00  |......... ... ..|
000000c0  00 20 00 00 a0 01 00 00  a0 01

### let's compile memory_segments.c file and use it to examine various sections

In [30]:
%%bash
in=./demo-programs/memory_segments.c
out=memory_segments.exe

gcc -g -o $out $in
./$out

output from main function
main is at address: 0x440257
main is at address: 0x440257
function is at address: 0x4401b9
global_initialized_var is at address 0x443024
static_initialized_var is at address 0x443038

static_var is at address 0x443040
global_var is at address 0x443044

heap_var is at address 0x23b41a0

stack_var is at address 0xbfa5a908

calling function()...
Output inside function:
static_initialized_var = 15
static_initialized_var is at address 0x443034
stack_var = 100
stack_var is at address 0xbfa5a8ec

calling function() again...
Output inside function:
static_initialized_var = 25
static_initialized_var is at address 0x443034
stack_var = 100
stack_var is at address 0xbfa5a8ec


In [31]:
# see the memory_segments.c source file
! cat ./demo-programs/memory_segments.c

#include <stdio.h>
#include <stdlib.h>

int global_var;
int global_initialized_var = 5;
char global_string[] = "John Smith!";

void function() {
   int stack_var = 100; 
   // notice this variable has the same name as the one in main()
   static int static_initialized_var = 5;
   static_initialized_var += 10;
   printf("%s\n", "Output inside function:");
   printf("static_initialized_var = %d\n", static_initialized_var);
   printf("static_initialized_var is at address %p\n", &static_initialized_var);
   printf("stack_var = %d\n", stack_var);
   printf("stack_var is at address %p\n", &stack_var);
}


int main() {
   int stack_var; // same name as the variable in function()
   static int static_var;
   int * heap_var_ptr;
   static int static_initialized_var = 5;

   heap_var_ptr = (int *) malloc(4);
   printf("%s\n", "output from main function");
   // these addressse are in the code/text segment
   printf("main is at address: %p\n", main);
   printf("main 

## ELF file parts

## Symbols
- function names, e.g., if printf built-in function is used, how does the program find it?

## Sections
- symbols are organized into **sections** - code lives in one section (.text) and data lives in another (.data, .rodata)

## Segments
- sections are organized into **segments**

## readelf and objdump
- these utilities can help us look at various parts

### look at all the symbols of a binary
- import symbols: main, _start, puts

In [20]:
! readelf --symbols memory_segments.exe


Symbol table '.dynsym' contains 10 entries:
   Num:    Value  Size Type    Bind   Vis      Ndx Name
     0: 00000000     0 NOTYPE  LOCAL  DEFAULT  UND 
     1: 00000000     0 NOTYPE  WEAK   DEFAULT  UND _ITM_deregisterTMCloneTab
     2: 00000000     0 FUNC    GLOBAL DEFAULT  UND printf@GLIBC_2.0 (2)
     3: 00000000     0 FUNC    WEAK   DEFAULT  UND __cxa_finalize@GLIBC_2.1.3 (3)
     4: 00000000     0 FUNC    GLOBAL DEFAULT  UND malloc@GLIBC_2.0 (2)
     5: 00000000     0 FUNC    GLOBAL DEFAULT  UND puts@GLIBC_2.0 (2)
     6: 00000000     0 NOTYPE  WEAK   DEFAULT  UND __gmon_start__
     7: 00000000     0 FUNC    GLOBAL DEFAULT  UND __libc_start_main@GLIBC_2.0 (2)
     8: 00000000     0 NOTYPE  WEAK   DEFAULT  UND _ITM_registerTMCloneTable
     9: 00002004     4 OBJECT  GLOBAL DEFAULT   16 _IO_stdin_used

Symbol table '.symtab' contains 81 entries:
   Num:    Value  Size Type    Bind   Vis      Ndx Name
     0: 00000000     0 NOTYPE  LOCAL  DEFAULT  UND 
     1: 0000

### display all the sections
- some important sections .text, .rodata, .data, .bss

In [32]:
! readelf --sections memory_segments.exe

There are 35 section headers, starting at offset 0x4310:

Section Headers:
  [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al
  [ 0]                   NULL            00000000 000000 000000 00      0   0  0
  [ 1] .interp           PROGBITS        00000194 000194 000013 00   A  0   0  1
  [ 2] .note.gnu.build-i NOTE            000001a8 0001a8 000024 00   A  0   0  4
  [ 3] .note.ABI-tag     NOTE            000001cc 0001cc 000020 00   A  0   0  4
  [ 4] .gnu.hash         GNU_HASH        000001ec 0001ec 000020 04   A  5   0  4
  [ 5] .dynsym           DYNSYM          0000020c 00020c 0000a0 10   A  6   1  4
  [ 6] .dynstr           STRTAB          000002ac 0002ac 0000a9 00   A  0   0  1
  [ 7] .gnu.version      VERSYM          00000356 000356 000014 02   A  5   0  2
  [ 8] .gnu.version_r    VERNEED         0000036c 00036c 000030 00   A  6   1  4
  [ 9] .rel.dyn          REL             0000039c 00039c 000040 08   A  5   0  4
  [10] .rel.plt     

## look at just one section, e.g., .rodata 
- some read-only data is stored in .rodata, e.g. Hello World!

In [33]:
! readelf -x .rodata memory_segments.exe


Hex dump of section '.rodata':
  0x00002000 03000000 01000200 4f757470 75742069 ........Output i
  0x00002010 6e736964 65206675 6e637469 6f6e3a00 nside function:.
  0x00002020 73746174 69635f69 6e697469 616c697a static_initializ
  0x00002030 65645f76 6172203d 2025640a 00000000 ed_var = %d.....
  0x00002040 73746174 69635f69 6e697469 616c697a static_initializ
  0x00002050 65645f76 61722069 73206174 20616464 ed_var is at add
  0x00002060 72657373 2025700a 00737461 636b5f76 ress %p..stack_v
  0x00002070 6172203d 2025640a 00737461 636b5f76 ar = %d..stack_v
  0x00002080 61722069 73206174 20616464 72657373 ar is at address
  0x00002090 2025700a 006f7574 70757420 66726f6d  %p..output from
  0x000020a0 206d6169 6e206675 6e637469 6f6e006d  main function.m
  0x000020b0 61696e20 69732061 74206164 64726573 ain is at addres
  0x000020c0 733a2025 700a0066 756e6374 696f6e20 s: %p..function 
  0x000020d0 69732061 74206164 64726573 733a2025 is at address: %
  0x000020e0 700a0000 676c6f

### objdump program can also be used to examine each program sections

In [34]:
! objdump -s -j .rodata memory_segments.exe


memory_segments.exe:     file format elf32-i386

Contents of section .rodata:
 2000 03000000 01000200 4f757470 75742069  ........Output i
 2010 6e736964 65206675 6e637469 6f6e3a00  nside function:.
 2020 73746174 69635f69 6e697469 616c697a  static_initializ
 2030 65645f76 6172203d 2025640a 00000000  ed_var = %d.....
 2040 73746174 69635f69 6e697469 616c697a  static_initializ
 2050 65645f76 61722069 73206174 20616464  ed_var is at add
 2060 72657373 2025700a 00737461 636b5f76  ress %p..stack_v
 2070 6172203d 2025640a 00737461 636b5f76  ar = %d..stack_v
 2080 61722069 73206174 20616464 72657373  ar is at address
 2090 2025700a 006f7574 70757420 66726f6d   %p..output from
 20a0 206d6169 6e206675 6e637469 6f6e006d   main function.m
 20b0 61696e20 69732061 74206164 64726573  ain is at addres
 20c0 733a2025 700a0066 756e6374 696f6e20  s: %p..function 
 20d0 69732061 74206164 64726573 733a2025  is at address: %
 20e0 700a0000 676c6f62 616c5f69 6e697469  p...global_initi
 2

In [35]:
! readelf -x .data memory_segments.exe
# even though global_initialized_var = 5; we see in hex but not ascii


Hex dump of section '.data':
  0x0000401c 00000000 20400000 05000000 4a6f686e .... @......John
  0x0000402c 20536d69 74682100 05000000 05000000  Smith!.........



In [27]:
! readelf -x .bss memory_segments.exe

Section '.bss' has no data to dump.


### look at the segments
- GNU_STACK is important to note
    - RW - Read and Write; NO Execute
    - data in stack will be treated as literal values or just data but not code!

In [36]:
! readelf --segments hello


Elf file type is DYN (Shared object file)
Entry point 0x1080
There are 11 program headers, starting at offset 52

Program Headers:
  Type           Offset   VirtAddr   PhysAddr   FileSiz MemSiz  Flg Align
  PHDR           0x000034 0x00000034 0x00000034 0x00160 0x00160 R   0x4
  INTERP         0x000194 0x00000194 0x00000194 0x00013 0x00013 R   0x1
      [Requesting program interpreter: /lib/ld-linux.so.2]
  LOAD           0x000000 0x00000000 0x00000000 0x003fc 0x003fc R   0x1000
  LOAD           0x001000 0x00001000 0x00001000 0x0042c 0x0042c R E 0x1000
  LOAD           0x002000 0x00002000 0x00002000 0x00364 0x00364 R   0x1000
  LOAD           0x002ef4 0x00003ef4 0x00003ef4 0x0013c 0x00148 RW  0x1000
  DYNAMIC        0x002efc 0x00003efc 0x00003efc 0x000f0 0x000f0 RW  0x4
  NOTE           0x0001a8 0x000001a8 0x000001a8 0x00044 0x00044 R   0x4
  GNU_EH_FRAME   0x0021c8 0x000021c8 0x000021c8 0x0004c 0x0004c R   0x4
  GNU_STACK      0x000000 0x00000000 0x00000000 0x00000 0x

## Disasseble using objdump
- look at assembly of the whole binary
- by default show AT&T assembly syntax with %, $
    - source before the destination
    - e.g., `mov $5, %eax`
- https://en.wikipedia.org/wiki/X86_assembly_language

In [38]:
! objdump -d hello


hello:     file format elf32-i386


Disassembly of section .init:

00001000 <_init>:
    1000:	53                   	push   %ebx
    1001:	83 ec 08             	sub    $0x8,%esp
    1004:	e8 97 00 00 00       	call   10a0 <__x86.get_pc_thunk.bx>
    1009:	81 c3 f7 2f 00 00    	add    $0x2ff7,%ebx
    100f:	8b 83 f4 ff ff ff    	mov    -0xc(%ebx),%eax
    1015:	85 c0                	test   %eax,%eax
    1017:	74 02                	je     101b <_init+0x1b>
    1019:	ff d0                	call   *%eax
    101b:	83 c4 08             	add    $0x8,%esp
    101e:	5b                   	pop    %ebx
    101f:	c3                   	ret    

Disassembly of section .plt:

00001020 <.plt>:
    1020:	ff b3 04 00 00 00    	pushl  0x4(%ebx)
    1026:	ff a3 08 00 00 00    	jmp    *0x8(%ebx)
    102c:	00 00                	add    %al,(%eax)
	...

00001030 <puts@plt>:
    1030:	ff a3 0c 00 00 00    	jmp    *0xc(%ebx)
    1036:	68 00 00 00 00       	push   $0x0
    103b:	e9 e

In [39]:
! objdump -D hello | grep -A20 main.:

00001199 <main>:
    1199:	8d 4c 24 04          	lea    0x4(%esp),%ecx
    119d:	83 e4 f0             	and    $0xfffffff0,%esp
    11a0:	ff 71 fc             	pushl  -0x4(%ecx)
    11a3:	55                   	push   %ebp
    11a4:	89 e5                	mov    %esp,%ebp
    11a6:	53                   	push   %ebx
    11a7:	51                   	push   %ecx
    11a8:	e8 28 00 00 00       	call   11d5 <__x86.get_pc_thunk.ax>
    11ad:	05 53 2e 00 00       	add    $0x2e53,%eax
    11b2:	83 ec 0c             	sub    $0xc,%esp
    11b5:	8d 90 08 e0 ff ff    	lea    -0x1ff8(%eax),%edx
    11bb:	52                   	push   %edx
    11bc:	89 c3                	mov    %eax,%ebx
    11be:	e8 6d fe ff ff       	call   1030 <puts@plt>
    11c3:	83 c4 10             	add    $0x10,%esp
    11c6:	b8 00 00 00 00       	mov    $0x0,%eax
    11cb:	8d 65 f8             	lea    -0x8(%ebp),%esp
    11ce:	59                   	pop    %ecx
    11cf:	5b                   	pop    %ebx
    1

### disassemble in Intel syntax
- much cleaner
- destination before source 
    - e.g., `mov eax, 5`

In [41]:
! objdump -M intel -D hello


hello:     file format elf32-i386


Disassembly of section .interp:

00000194 <.interp>:
 194:	2f                   	das    
 195:	6c                   	ins    BYTE PTR es:[edi],dx
 196:	69 62 2f 6c 64 2d 6c 	imul   esp,DWORD PTR [edx+0x2f],0x6c2d646c
 19d:	69 6e 75 78 2e 73 6f 	imul   ebp,DWORD PTR [esi+0x75],0x6f732e78
 1a4:	2e 32 00             	xor    al,BYTE PTR cs:[eax]

Disassembly of section .note.gnu.build-id:

000001a8 <.note.gnu.build-id>:
 1a8:	04 00                	add    al,0x0
 1aa:	00 00                	add    BYTE PTR [eax],al
 1ac:	14 00                	adc    al,0x0
 1ae:	00 00                	add    BYTE PTR [eax],al
 1b0:	03 00                	add    eax,DWORD PTR [eax]
 1b2:	00 00                	add    BYTE PTR [eax],al
 1b4:	47                   	inc    edi
 1b5:	4e                   	dec    esi
 1b6:	55                   	push   ebp
 1b7:	00 79 ff             	add    BYTE PTR [ecx-0x1],bh
 1ba:	d1 94 05 ea 96 16 8c 	rcl    DWORD PTR [

In [40]:
! objdump -M intel -D hello | grep -A20 main.:

00001199 <main>:
    1199:	8d 4c 24 04          	lea    ecx,[esp+0x4]
    119d:	83 e4 f0             	and    esp,0xfffffff0
    11a0:	ff 71 fc             	push   DWORD PTR [ecx-0x4]
    11a3:	55                   	push   ebp
    11a4:	89 e5                	mov    ebp,esp
    11a6:	53                   	push   ebx
    11a7:	51                   	push   ecx
    11a8:	e8 28 00 00 00       	call   11d5 <__x86.get_pc_thunk.ax>
    11ad:	05 53 2e 00 00       	add    eax,0x2e53
    11b2:	83 ec 0c             	sub    esp,0xc
    11b5:	8d 90 08 e0 ff ff    	lea    edx,[eax-0x1ff8]
    11bb:	52                   	push   edx
    11bc:	89 c3                	mov    ebx,eax
    11be:	e8 6d fe ff ff       	call   1030 <puts@plt>
    11c3:	83 c4 10             	add    esp,0x10
    11c6:	b8 00 00 00 00       	mov    eax,0x0
    11cb:	8d 65 f8             	lea    esp,[ebp-0x8]
    11ce:	59                   	pop    ecx
    11cf:	5b                   	pop    ebx
    11d0:	5d         