In [17]:
import angr

# Demo: quick angr pipeline
**Goal:** show key angr features end-to-end:
1. load binary & project  
2. build CFG (fast)  
3. traverse CFG -> per-instruction disassembly (mnemonic + operands)  
4. inspect VEX IR for a block  
5. tiny symbolic execution: set symbolic input & find target


In [18]:
# the path for the binary to be analyzed
BINARY_PATH = "../data/bound_vuln_stripped" 

## 1) Load an angr project & some quick checks:

In [19]:
proj_stripped = angr.Project(BINARY_PATH, auto_load_libs=False)
print("Arch:", proj_stripped.arch)
print("Entry:", hex(proj_stripped.entry))
print("Base:", hex(proj_stripped.loader.main_object.mapped_base))
print("Sections:", [ (s.name, hex(s.min_addr), hex(s.max_addr)) for s in proj_stripped.loader.main_object.sections ])
syms_stripped = proj_stripped.loader.main_object.symbols

Arch: <Arch AMD64 (LE)>
Entry: 0x4011c0
Base: 0x400000
Sections: [('', '0x400000', '0x3fffff'), ('.interp', '0x400318', '0x400333'), ('.note.gnu.property', '0x400338', '0x400367'), ('.note.gnu.build-id', '0x400368', '0x40038b'), ('.note.ABI-tag', '0x40038c', '0x4003ab'), ('.gnu.hash', '0x4003b0', '0x4003d7'), ('.dynsym', '0x4003d8', '0x400527'), ('.dynstr', '0x400528', '0x40060a'), ('.gnu.version', '0x40060c', '0x400627'), ('.gnu.version_r', '0x400628', '0x400677'), ('.rela.dyn', '0x400678', '0x40074f'), ('.rela.plt', '0x400750', '0x4007f7'), ('.init', '0x401000', '0x40101a'), ('.plt', '0x401020', '0x40109f'), ('.plt.got', '0x4010a0', '0x4010af'), ('.plt.sec', '0x4010b0', '0x40111f'), ('.text', '0x401120', '0x40131b'), ('.fini', '0x40131c', '0x401328'), ('.rodata', '0x402000', '0x40201f'), ('.eh_frame_hdr', '0x402020', '0x40205b'), ('.eh_frame', '0x402060', '0x402133'), ('.init_array', '0x403d88', '0x403d8f'), ('.fini_array', '0x403d90', '0x403d97'), ('.dynamic', '0x403d98', '0x403f87'

In [20]:
BINARY_PATH = "../data/bound_vuln"
proj = angr.Project(BINARY_PATH, auto_load_libs=False)
section_syms = [s.name for s in proj_stripped.loader.main_object.sections]
section = [s.name for s in proj.loader.main_object.sections]
print(set(section) - set(section_syms))
main_symbol = proj.loader.main_object.get_symbol("main")
print (hex(main_symbol.rebased_addr))
main_symbol = proj.loader.main_object.symbols_by_addr[int('0x401120', 16)]
print(main_symbol.name)



{'.strtab', '.symtab'}
0x401120
main


## 2) Build a CFG (fast)

In [21]:
cfg = proj.analyses.CFGFast(normalize=True)
funcs = list(cfg.kb.functions.items())
for addr, func in funcs:
    print(f"  {func.name:40} -> {hex(addr)}")
if "main" in cfg.kb.functions:
    main_addr = cfg.kb.functions["main"].addr
    print(hex(main_addr))

  _init                                    -> 0x401000
  sub_401020                               -> 0x401020
  sub_40102d                               -> 0x40102d
  sub_401030                               -> 0x401030
  sub_40103f                               -> 0x40103f
  sub_401040                               -> 0x401040
  sub_40104f                               -> 0x40104f
  sub_401050                               -> 0x401050
  sub_40105f                               -> 0x40105f
  sub_401060                               -> 0x401060
  sub_40106f                               -> 0x40106f
  sub_401070                               -> 0x401070
  sub_40107f                               -> 0x40107f
  sub_401080                               -> 0x401080
  sub_40108f                               -> 0x40108f
  sub_401090                               -> 0x401090
  sub_40109f                               -> 0x40109f
  __cxa_finalize                           -> 0x4010a0
  puts    

## 3) Traverse the CFG: iterate blocks in `main`, extract disasm, opcode & operands
**For each block**: we get capstone disassembly if available (`block.capstone`) and per-insn fields:
- `insn.mnemonic` and `insn.op_str`  
- raw bytes: `insn.bytes`

In [None]:
for func_addr, func in cfg.functions.items():
    if func.name.startswith('sub_') or func.name in ['UnresolvableCallTarget', 'UnresolvableJumpTarget']:
        continue
    print(func.name)
    if(func.name!="main"):
        continue
    for block in func.blocks:
        block_addr = block.addr
        print(block_addr)
        #disassemble instructions from block
        disassembly = block.capstone.insns
        records = []
        #build record of instructions for binary file
        for insn in disassembly:
            records.append({
                "addr": hex(insn.address),
                "mnemonic": insn.mnemonic,
                "op_str": insn.op_str,
                "bytes": insn.bytes.hex()
            })
        print(records)

_init
__cxa_finalize
puts
strlen
__stack_chk_fail
strcspn
fgets
strcmp
__memcpy_chk
main
4198688
[{'addr': '0x401120', 'mnemonic': 'endbr64', 'op_str': '', 'bytes': 'f30f1efa'}, {'addr': '0x401124', 'mnemonic': 'push', 'op_str': 'rbp', 'bytes': '55'}, {'addr': '0x401125', 'mnemonic': 'lea', 'op_str': 'rdi, [rip + 0xedf]', 'bytes': '488d3ddf0e0000'}, {'addr': '0x40112c', 'mnemonic': 'sub', 'op_str': 'rsp, 0x90', 'bytes': '4881ec90000000'}, {'addr': '0x401133', 'mnemonic': 'mov', 'op_str': 'rax, qword ptr fs:[0x28]', 'bytes': '64488b042528000000'}, {'addr': '0x40113c', 'mnemonic': 'mov', 'op_str': 'qword ptr [rsp + 0x88], rax', 'bytes': '4889842488000000'}, {'addr': '0x401144', 'mnemonic': 'xor', 'op_str': 'eax, eax', 'bytes': '31c0'}, {'addr': '0x401146', 'mnemonic': 'mov', 'op_str': 'rbp, rsp', 'bytes': '4889e5'}, {'addr': '0x401149', 'mnemonic': 'call', 'op_str': '0x4010b0', 'bytes': 'e862ffffff'}]
4198734
[{'addr': '0x40114e', 'mnemonic': 'mov', 'op_str': 'rdx, qword ptr [rip + 0x2eb

## 4) Inspect VEX IR for a block
angr exposes PyVEX `IRSBB` via `block.vex` (or `block.vex.pp()` text)

In [23]:
main_function = cfg.functions[main_addr]
for block in main_function.blocks:
    print("VEX IR (short):")
    print(block.vex.pp())

    # Or iterate VEX statements
    stmts = block.vex.statements
    print("\nStatements count:", len(stmts))
    for s in stmts:
        print(" -", s)  
    break

VEX IR (short):
IRSB {
   t0:Ity_I64 t1:Ity_I64 t2:Ity_I64 t3:Ity_I64 t4:Ity_I64 t5:Ity_I64 t6:Ity_I64 t7:Ity_I64 t8:Ity_I32 t9:Ity_I32 t10:Ity_I32 t11:Ity_I64 t12:Ity_I64 t13:Ity_I64 t14:Ity_I64 t15:Ity_I64 t16:Ity_I64 t17:Ity_I64 t18:Ity_I64 t19:Ity_I64 t20:Ity_I64 t21:Ity_I64 t22:Ity_I32 t23:Ity_I64 t24:Ity_I32 t25:Ity_I64 t26:Ity_I64 t27:Ity_I64 t28:Ity_I64 t29:Ity_I64 t30:Ity_I64 t31:Ity_I64 t32:Ity_I64

   00 | ------ IMark(0x401120, 4, 0) ------
   01 | PUT(rip) = 0x0000000000401124
   02 | ------ IMark(0x401124, 1, 0) ------
   03 | t0 = GET:I64(rbp)
   04 | t14 = GET:I64(rsp)
   05 | t13 = Sub64(t14,0x0000000000000008)
   06 | PUT(rsp) = t13
   07 | STle(t13) = t0
   08 | ------ IMark(0x401125, 7, 0) ------
   09 | PUT(rdi) = 0x000000000040200b
   10 | ------ IMark(0x40112c, 7, 0) ------
   11 | t3 = Sub64(t13,0x0000000000000090)
   12 | PUT(rsp) = t3
   13 | PUT(rip) = 0x0000000000401133
   14 | ------ IMark(0x401133, 9, 0) ------
   15 | t16 = GET:I64(fs)
   16 | t15 = Add64

## 5) Simple data-flow: Symbolic Execution to trace the value in specific registers


In [24]:
# define the hook for fgets
class MyFgets(angr.SimProcedure):
    def run(self, s, size, stream):
        data = b"secret\x00"
        bv = claripy.BVV(data)
        self.state.memory.store(s, bv)
        return s

# # Hook commonly used libc symbols
proj.hook_symbol("puts", angr.SIM_PROCEDURES['libc']['puts']())
proj.hook_symbol("strlen", angr.SIM_PROCEDURES['libc']['strlen']())
proj.hook_symbol("strcmp", angr.SIM_PROCEDURES['libc']['strcmp']())
# proj.hook_symbol("fgets", angr.SIM_PROCEDURES['libc']['fgets']())
proj.hook_symbol("fgets", MyFgets())
proj.hook_symbol("__memcpy_chk", angr.SIM_PROCEDURES['libc']['memcpy']())

# // bound_vuln.c
# #include <stdio.h>
# #include <string.h>

# int check(const char* in){
#     char buf[16];
#     if (strlen(in) <= 32) {
#         strcpy(buf, in);
#         return strcmp(buf, "secret");
#     }
#     return -1;
# }
# int main(){
#     char input[128];
#     puts("Enter token:");
#     fgets(input, sizeof(input), stdin);
#     input[strcspn(input, "\n")] = 0;
#     if (check(input)==0) puts("OK");
#     else puts("NO");
#     return 0;
# }

func = next(cfg.functions.get_by_name("main"))

# the entry for symbolic execution
state = proj.factory.blank_state(addr=func.addr)

# define rdi as a symbol
import claripy
rdi_sym = claripy.BVS("rdi_sym", 64)
state.regs.rdi = rdi_sym

# create the symbolic execution manager
simgr = proj.factory.simgr(state)

# execute step by step
for step in range(200):
    if len(simgr.active) == 0:
        print("No more active states.")
        break
    
    state = simgr.active[0]
    print("RDI =", state.solver.eval_upto(state.regs.rdi, 1))
    print("RDI (AST) =", state.regs.rdi)
    print(f"\n--- Step {step}, PC = {hex(state.addr)} ---")
    if((state.addr)==int("0x4011ab", 16)):
        print("hell no")
        break
    if((state.addr)==int("0x401181", 16)):
        print("Oh yes")
        break
    try:
        block = proj.factory.block(state.addr)
        print(block.disassembly)
    except:
        pass

    # one step further
    simgr.step()




RDI = [0]
RDI (AST) = <BV64 rdi_sym_27_64>

--- Step 0, PC = 0x401120 ---
0x401120:	endbr64	
0x401124:	push	rbp
0x401125:	lea	rdi, [rip + 0xedf]
0x40112c:	sub	rsp, 0x90
0x401133:	mov	rax, qword ptr fs:[0x28]
0x40113c:	mov	qword ptr [rsp + 0x88], rax
0x401144:	xor	eax, eax
0x401146:	mov	rbp, rsp
0x401149:	call	0x4010b0
RDI = [4202507]
RDI (AST) = <BV64 0x40200b>

--- Step 1, PC = 0x4010b0 ---
0x4010b0:	endbr64	
0x4010b4:	bnd jmp	qword ptr [rip + 0x2ee5]
RDI = [4202507]
RDI (AST) = <BV64 0x40200b>

--- Step 2, PC = 0x500010 ---
0x500010:	add	byte ptr [rax], al
0x500012:	add	byte ptr [rax], al
0x500014:	add	byte ptr [rax], al
0x500016:	add	byte ptr [rax], al
RDI = [4202507]
RDI (AST) = <BV64 0x40200b>

--- Step 3, PC = 0x40114e ---
0x40114e:	mov	rdx, qword ptr [rip + 0x2ebb]
0x401155:	mov	esi, 0x80
0x40115a:	mov	rdi, rbp
0x40115d:	call	0x4010f0
RDI = [576460752303357800]
RDI (AST) = <BV64 0x7fffffffffeff68>

--- Step 4, PC = 0x4010f0 ---
0x4010f0:	endbr64	
0x4010f4:	bnd jmp	qword ptr [rip



RDI = [576460752303357800]
RDI (AST) = <BV64 0x7fffffffffeff68>

--- Step 10, PC = 0x4012b0 ---
0x4012b0:	endbr64	
0x4012b4:	push	rbp
0x4012b5:	mov	rbp, rdi
0x4012b8:	sub	rsp, 0x20
0x4012bc:	mov	rax, qword ptr fs:[0x28]
0x4012c5:	mov	qword ptr [rsp + 0x18], rax
0x4012ca:	xor	eax, eax
0x4012cc:	call	0x4010c0
RDI = [576460752303357800]
RDI (AST) = <BV64 0x7fffffffffeff68>

--- Step 11, PC = 0x4010c0 ---
0x4010c0:	endbr64	
0x4010c4:	bnd jmp	qword ptr [rip + 0x2edd]
RDI = [576460752303357800]
RDI (AST) = <BV64 0x7fffffffffeff68>

--- Step 12, PC = 0x500018 ---
0x500018:	add	byte ptr [rax], al
0x50001a:	add	byte ptr [rax], al
0x50001c:	add	byte ptr [rax], al
0x50001e:	add	byte ptr [rax], al
RDI = [576460752303357800]
RDI (AST) = <BV64 0x7fffffffffeff68>

--- Step 13, PC = 0x4012d1 ---
0x4012d1:	cmp	rax, 0x20
0x4012d5:	ja	0x401310
RDI = [576460752303357800]
RDI (AST) = <BV64 0x7fffffffffeff68>

--- Step 14, PC = 0x4012d7 ---
0x4012d7:	mov	rdi, rsp
0x4012da:	mov	rsi, rbp
0x4012dd:	lea	rdx, [r