# IOC extractor using Radare2

This notebook extract IOCs from binaries using Radare2. IOC matchers can be easily extended by adding an extra matcher. Every matcher result can be run through pipes, where currently only MISP is supported.

### Load environment

In [44]:
try:
    # if using jupyter within cutter, use the following. This will use the current active binary.
    import cutter
    # we'll assign cutter to variable r2 to be consistent with r2pipe
    r2 = cutter
except ModuleNotFoundError as exc:
    # using r2pipe to open a binary
    import r2pipe
    r2 = r2pipe.open("/home/jovyan/radare2/malware/apache")

### Start analysis

In [None]:
%time r2.cmd('aaa')

### Extract strings

In [46]:
print(r2.cmd('iz'))

[Strings]
Num Paddr      Vaddr      Len Size Section  Type  String
000 0x0010c350 0x0050c350   5   6 (.rodata) ascii %s:%d
001 0x0010c356 0x0050c356  24  25 (.rodata) ascii SIGHUP received, exiting
002 0x0010c36f 0x0050c36f  25  26 (.rodata) ascii SIGTERM received, exiting
003 0x0010c389 0x0050c389  24  25 (.rodata) ascii SIGINT received, exiting
004 0x0010c3a2 0x0050c3a2  27  28 (.rodata) ascii paused, press 'r' to resume
005 0x0010c3be 0x0050c3be  15  16 (.rodata) ascii \e[01;32mresumed
006 0x0010c3ce 0x0050c3ce  24  25 (.rodata) ascii Ctrl+C received, exiting
007 0x0010c3f0 0x0050c3f0  49  50 (.rodata) ascii \e[01;33mpaused\e[0m, press \e[01;35mr\e[0m to resume
008 0x0010c4b8 0x0050c4b8  35  36 (.rodata) ascii  * POOL #%-7zu%s variant=%s, TLS=%d
009 0x0010c4e0 0x0050c4e0  71  72 (.rodata) ascii \e[1;32m * \e[0m\e[1;37mPOOL #%-7zu\e[0m\e[1;%dm%s\e[0m variant \e[1;37m%s\e[0m
010 0x0010c528 0x0050c528  60  61 (.rodata) ascii \e[1;32m * \e[0m\e[1;37m%-13s\e[0m\e[1;36m%s/%s\e[0m\e[1;37m 

In [None]:
from pprint import pprint
r = json.loads(r2.cmd('ij'))
pprint(r)
print(r.get('bin').get('arch'))

In [84]:
# install pymisp if not exists
try:
    from pymisp import ExpandedPyMISP
except ModuleNotFoundError as exc:
    print("Could not find module pymisp, installing...")
    !pip install pymisp

###  MISP

The following cell will configure a MISP pipe which will run each extracted IOC through MISP. Uncomment and configure the misp_url and misp_key.

In [None]:
# The URL of the MISP instance to connect to
# make sure the radare2-notebook container can reach MISP
# misp_url = 'https://172.17.0.3:443'

# Can be found in the MISP web interface under ||
# http://+MISP_URL+/users/view/me -> Authkey
# misp_key = 'V9RraxF0YT6riCT1TBD1D1TdabxI1MsVJp12E8pq'

# Should PyMISP verify the MISP certificate
# not recommended
misp_verifycert = False
import urllib3
urllib3.disable_warnings()
print("warning: disabled certificate verification")

### Configure the matchers and pipes

In [None]:
import r2pipe
import json
import struct
import re
import base64
from pprint import pprint, pformat
import urllib.parse

IP_MATCHER = re.compile("(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(?:[:]\d+)?)")
URL_MATCHER = re.compile('(?:(?:https?|ftp|file)://|www\.|ftp\.)[-A-Z0-9+&@#/%=~_|$?!:,.]*[A-Z0-9+&@#/%=~_|$]', re.IGNORECASE)
EMAIL_MATCHER = re.compile('([A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4})', re.IGNORECASE)

def regex_matcher(matcher):
    return lambda st: matcher.findall(st)

def contains_matcher(s):
    return lambda st: [st] if s in st else []
                
matchers = [regex_matcher(IP_MATCHER), regex_matcher(URL_MATCHER), regex_matcher(EMAIL_MATCHER), contains_matcher('\\e['), contains_matcher('HTTP')]

pipes = []

def misp_pipe():
    print("Using MISP pipe (url={})".format(misp_url))
    from pymisp import ExpandedPyMISP
    misp = ExpandedPyMISP(misp_url, misp_key, misp_verifycert, debug=False)
    def fn(ioc):
        results = misp.search(value=ioc)  # everything updated since that timestamp
        for result in results:
            print (">>> MISP result found: {}".format(urllib.parse.urljoin(misp_url, "/events/view/" + result.get('Event').get('id'))))
        return r
        
    return fn

if misp_url != None:
    pipes.append(misp_pipe())

def print_s(s, r):
    print('0x{:08x} 0x{:08x} {:10} {:4} {:10} {}'.format(s.get('paddr'), s.get('vaddr'), s.get('type'), s.get('length'), s.get('section'), r))

### Start IOC extraction

In [88]:
strings = json.loads(r2.cmd('izj'))
for s in strings:
    try:
        st = base64.b64decode(s.get('string')).decode(s.get('type'))

        for matcher in matchers:
            matches = matcher(st)
            for match in matches: 
                print_s (s, match)
                for pipe in pipes:
                    pipe(match)
                    pass
    except ValueError as e:
        # print(e)
        continue
    except LookupError as e:
        # print(e)
        continue

0x0010c3be 0x0050c3be ascii        15 .rodata    \e[01;32mresumed
>>> MISP result found: https://172.17.0.3:443/events/view/2
0x0010c3f0 0x0050c3f0 ascii        49 .rodata    \e[01;33mpaused\e[0m, press \e[01;35mr\e[0m to resume
0x0010c4e0 0x0050c4e0 ascii        71 .rodata    \e[1;32m * \e[0m\e[1;37mPOOL #%-7zu\e[0m\e[1;%dm%s\e[0m variant \e[1;37m%s\e[0m
0x0010c528 0x0050c528 ascii        60 .rodata    \e[1;32m * \e[0m\e[1;37m%-13s\e[0m\e[1;36m%s/%s\e[0m\e[1;37m %s\e[0m
0x0010c568 0x0050c568 ascii        41 .rodata    \e[1;32m * \e[0m\e[1;37m%-13slibuv/%s %s\e[0m
0x0010f8b0 0x0050f8b0 ascii         5 .rodata    \e[0m\n
0x0010f8b6 0x0050f8b6 ascii         7 .rodata    \e[0;31m
0x0010f8be 0x0050f8be ascii         7 .rodata    \e[0;33m
0x0010f8c6 0x0050f8c6 ascii         7 .rodata    \e[1;37m
0x0010f8ce 0x0050f8ce ascii         5 .rodata    \e[90m
0x0011031d 0x0051031d ascii         7 .rodata    \e[1;30m
0x00110388 0x00510388 ascii        61 .rodata    \e[1;37muse pool \e[0m\e[1;36m%s:%d