# bep_0003 decode

https://www.bittorrent.org/beps/bep_0003.html

* Strings are length-prefixed base ten followed by a colon and the string. For example 4:spam corresponds to 'spam'.
* Integers are represented by an 'i' followed by the number in base 10 followed by an 'e'. For example i3e corresponds to 3 and i-3e corresponds to -3. Integers have no size limitation. i-0e is invalid. All encodings with a leading zero, such as i03e, are invalid, other than i0e, which of course corresponds to 0.
* Lists are encoded as an 'l' followed by their elements (also bencoded) followed by an 'e'. For example l4:spam4:eggse corresponds to ['spam', 'eggs'].
* Dictionaries are encoded as a 'd' followed by a list of alternating keys and their corresponding values followed by an 'e'. For example, d3:cow3:moo4:spam4:eggse corresponds to {'cow': 'moo', 'spam': 'eggs'} and d4:spaml1:a1:bee corresponds to {'spam': ['a', 'b']}. Keys must be strings and appear in sorted order (sorted as raw strings, not alphanumerics).

In [53]:
def decode(byte_string, remainder=None):
    
    byte_string = bytearray(byte_string)
    char = chr(byte_string[0])

    if char == "d":
        content, remainder = _read_dict(byte_string[1:])

    elif char == "l":
        content, remainder = _read_list(byte_string[1:])

    elif char == "i":
        content, remainder = _read_int(byte_string[1:], "e")

    else:
        content, remainder = _read_string(byte_string)
        
    if remainder:
        return content, remainder
    else:
        return content

    
def _read_dict(byte_string):
    dict_ = {}
    
    while chr(byte_string[0]) != "e":
        key, remainder = decode(byte_string)
        value, remainder = decode(remainder)
        dict_[key.decode()] = value
        byte_string = remainder
        
    return dict_, byte_string[1:]


def _read_list(byte_string):
    list_ = []
           
    while chr(byte_string[0]) != "e":
        string, remainder = decode(byte_string)
        list_.append(string)
        byte_string = remainder
        
    return list_, byte_string[1:]


def _read_int(byte_string, end):
    digits = []
    
    byte = byte_string.pop(0)
    while chr(byte) != end:
        digits.append(str(chr(byte)))
        byte = byte_string.pop(0)
  
    return int(''.join(digits)), byte_string

    
def _read_string(byte_string):
    num, rest = _read_int(byte_string, ":")
    return bytearray([byte_string.pop(0) for _ in range(num)]), byte_string


def test_decode(f):
    f(decode(b"ll4:spam4:eggsel4:spam4:eggsee") == [[bytearray(b'spam'), bytearray(b'eggs')], [bytearray(b'spam'), bytearray(b'eggs')]])
    f(decode(b"d3:cow3:moo4:spam4:eggse") == {'cow': bytearray(b'moo'), 'spam': bytearray(b'eggs')})
    f(decode(b"d4:spaml1:a1:bee") == {'spam': [bytearray(b'a'), bytearray(b'b')]})
    f(decode(b"li-3ei0ed3:cowi5400e4:spam4:eggsee") == [-3, 0, {'cow': 5400, 'spam': bytearray(b'eggs')}])

In [54]:
test_decode(print)
%timeit test_decode(lambda x: x)

True
True
True
True
102 µs ± 952 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [66]:
def decode(byte_string):
    return _decode(bytearray(byte_string))[0]


def _decode(byte_string, remainder=None):
    
    byte = byte_string[0]

    if byte == ord("i"):
        del byte_string[0]
        return _read_int(byte_string, ord("e"))
    elif byte == ord("l"):
        del byte_string[0]
        return _read_list(byte_string)
    elif byte == ord("d"):
        del byte_string[0]
        return _read_dict(byte_string)
    else:
        return _read_string(byte_string)
        
    
def _read_dict(byte_string):
    dict_ = {}
    
    while byte_string[0] != ord("e"):
        key, remainder = _decode(byte_string)
        value, remainder = _decode(remainder)
        dict_[key.decode()] = value
        byte_string = remainder
        
    return dict_, byte_string[1:]


def _read_list(byte_string):
    list_ = []
           
    while byte_string[0] != ord("e"):
        string, remainder = _decode(byte_string)
        list_.append(string)
        byte_string = remainder
        
    return list_, byte_string[1:]


def _read_int(byte_string, end):
    digits = []
    
    byte = byte_string.pop(0)
    while byte != end:
        digits.append(str(chr(byte)))
        byte = byte_string.pop(0)
  
    return int(''.join(digits)), byte_string

    
def _read_string(byte_string):
    num, rest = _read_int(byte_string, ord(":"))
    return bytearray([byte_string.pop(0) for _ in range(num)]), byte_string


def test_decode(f):
    f(decode(b"ll4:spam4:eggsel4:spam4:eggsee") == [[bytearray(b'spam'), bytearray(b'eggs')], [bytearray(b'spam'), bytearray(b'eggs')]])
    f(decode(b"d3:cow3:moo4:spam4:eggse") == {'cow': bytearray(b'moo'), 'spam': bytearray(b'eggs')})
    f(decode(b"d4:spaml1:a1:bee") == {'spam': [bytearray(b'a'), bytearray(b'b')]})
    f(decode(b"li-3ei0ed3:cowi5400e4:spam4:eggsee") == [-3, 0, {'cow': 5400, 'spam': bytearray(b'eggs')}])

In [67]:
test_decode(print)
%timeit test_decode(lambda x: x)

True
True
True
True
89.8 µs ± 904 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [68]:
import socket
import sys


sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
server_address = ("67.215.246.10", 6881)

message = 'd1:ad2:id20:abcdefghij01234567896:target20:mnopqrstuvwxyz123456e1:q9:find_node1:t2:aa1:y1:qe'
# message = 'd1:ad2:id20:abcdefghij0123456789e1:q4:ping1:t2:aa1:y1:qe'
sent = sock.sendto(message.encode(), server_address)
data, server = sock.recvfrom(8000)
data

b'd2:ip6:\xbc\xd6\x0b\x1a\x1b\xdc1:rd2:id20:2\xf5NisQ\xffJ\xec)\xcd\xba\xab\xf2\xfb\xe3F|\xc2g5:nodes416:\xf8\xfd_I\x92\xa1\x85}[\xeez\xb9F)\x05IW6ur\xc9]F\x0e\x89d\xd6\xf0\x98\xcd\xf6\xd6%\x9b\xeb\xb7\x02\x91\xe0\xdf\xd7\xb3\xf4!U%=\xc5\xf3\xc2\xaa[\'\x8e1\xee\x86\xa5\xda\xaa\x9e\x85\xe1\xccd\x86t\xb9HP\xb4-\xb8\xa7\xdeo\xf2\x0f\x9a\t*\xd1\x08\xad;\xe3\xf4U,:6*Uv[\xacj\xf9B\xb0\xb6a\xc4\x91\xe2}\xc5\xc5\x15\xd9\x88\x864\xc9\xe7m\x9aW\x02\x11\xe6\xb2\x0c2-\xf7\x813\xe90#\xfe\x14I\xf1\xf1\xbb\xe9\xeb\xb3\xa6\xdb<\x87\x0c>\x99$^R\x8e\xb3\x04Xx\x08\xc8L\xa8\x04\xd5JV\x84s\xea\xc9\xc0\x0fk\x92\nO\xac\xfd\xf1b\xcb2\x1c\xc4\x91\xd4|\xae\x9ap(\xcf\'\x02=\r\xe3\xcfq0\xf72\xb9\xb2\xd5o\xffd&"\xdf\x05\x17P\xd6h\n7\xaf\xd0\xef\xf2i\x17\xd3\x03\xd9\x08\xd1iU\\\xdd\x0bs\x1a\xe1\xf7\xb2\xefQ\x05\xcc;\x92B\xa1I(\xd6\x8f5\xeb\xf4\xdcw\x85I\xc6z\x9f\xc4\x91\t0#\xe4\x1a\xdd\x12\x95LP\x1c\x9b-\xf2\xeep\xb9U\x07^/$\x97\xf1\x1a\xe2\x9ab<\xfa\x84a4V\xc0>x^T\xc9<\xc6\x02\x7f\xdb\xc9F1\xbf\xda\x1a\xe1\x86\xe4

In [71]:
import ipaddress

# ipaddress.ip_address(decode(data)['ip'])
ipaddress.ip_address(int.from_bytes(decode(data)['ip'], byteorder="big"))

IPv6Address('::bcd6:b1a:1bdc')

In [72]:
decode(data)

{'ip': bytearray(b'\xbc\xd6\x0b\x1a\x1b\xdc'),
 'r': {'id': bytearray(b'2\xf5NisQ\xffJ\xec)\xcd\xba\xab\xf2\xfb\xe3F|\xc2g'),
  'nodes': bytearray(b'\xf8\xfd_I\x92\xa1\x85}[\xeez\xb9F)\x05IW6ur\xc9]F\x0e\x89d\xd6\xf0\x98\xcd\xf6\xd6%\x9b\xeb\xb7\x02\x91\xe0\xdf\xd7\xb3\xf4!U%=\xc5\xf3\xc2\xaa[\'\x8e1\xee\x86\xa5\xda\xaa\x9e\x85\xe1\xccd\x86t\xb9HP\xb4-\xb8\xa7\xdeo\xf2\x0f\x9a\t*\xd1\x08\xad;\xe3\xf4U,:6*Uv[\xacj\xf9B\xb0\xb6a\xc4\x91\xe2}\xc5\xc5\x15\xd9\x88\x864\xc9\xe7m\x9aW\x02\x11\xe6\xb2\x0c2-\xf7\x813\xe90#\xfe\x14I\xf1\xf1\xbb\xe9\xeb\xb3\xa6\xdb<\x87\x0c>\x99$^R\x8e\xb3\x04Xx\x08\xc8L\xa8\x04\xd5JV\x84s\xea\xc9\xc0\x0fk\x92\nO\xac\xfd\xf1b\xcb2\x1c\xc4\x91\xd4|\xae\x9ap(\xcf\'\x02=\r\xe3\xcfq0\xf72\xb9\xb2\xd5o\xffd&"\xdf\x05\x17P\xd6h\n7\xaf\xd0\xef\xf2i\x17\xd3\x03\xd9\x08\xd1iU\\\xdd\x0bs\x1a\xe1\xf7\xb2\xefQ\x05\xcc;\x92B\xa1I(\xd6\x8f5\xeb\xf4\xdcw\x85I\xc6z\x9f\xc4\x91\t0#\xe4\x1a\xdd\x12\x95LP\x1c\x9b-\xf2\xeep\xb9U\x07^/$\x97\xf1\x1a\xe2\x9ab<\xfa\x84a4V\xc0>x^T\xc9<\x