In the last lesson we encountered the Bitcoin protocol's [Version Handshake](https://en.bitcoin.it/wiki/Version_Handshake). We saw how Bitcoin network peers won't respond if you don't start the conversion with a `version` message.

But we cheated last lesson. I gave you a serialize `version` message and didn't tell you how I created it. And 

We were also lazy: we didn't parse the cryptic `payload` of the `version` message that our peer gave us.

We, too, we rude! After listening for our peer's `version` message we stopped listening and never received their `verack` message.

So you see, we have much to fix in this lesson!

To begin, I'm going to redefine everything from last lesson. I'm going to rename `Message` -> `Packet`.

In [11]:
from hashlib import sha256

NETWORK_MAGIC = 0xD9B4BEF9

def bytes_to_int(b):
    return int.from_bytes(b, 'little')

def read_magic(sock):
    magic_bytes = sock.recv(4)
    magic = bytes_to_int(magic_bytes)
    return magic

def read_command(sock):
    raw = sock.recv(12)
    # remove empty bytes
    command = raw.replace(b"\x00", b"")
    return command

def read_length(sock):
    raw = sock.recv(4)
    length = bytes_to_int(raw)
    return length

def read_checksum(sock):
    # FIXME: protocol documentation says this should be an integer ...
    raw = sock.recv(4)
    return raw

def calculate_checksum(payload_bytes):
    """First 4 bytes of sha256(sha256(payload))"""
    first_round = sha256(payload_bytes).digest()
    second_round = sha256(first_round).digest()
    first_four_bytes = second_round[:4]
    return first_four_bytes

def read_payload(sock, length):
    payload = sock.recv(length)
    return payload


class Packet:

    def __init__(self, command, payload):
        self.command = command
        self.payload = payload

    @classmethod
    def from_socket(cls, sock):
        magic = read_magic(sock)
        if magic != NETWORK_MAGIC:
            raise ValueError(f'Network magic "{magic}" is wrong')

        command = read_command(sock)
        payload_length = read_length(sock)
        checksum = read_checksum(sock)
        payload = read_payload(sock, payload_length)
        
        calculated_checksum = calculate_checksum(payload)
        if calculated_checksum != checksum:
            raise RuntimeError("Checksums don't match")

        if payload_length != len(payload):
            raise RuntimeError("Tried to read {payload_length} bytes, only received {len(payload)} bytes")

        return cls(command, payload)

    def to_bytes(self):
        pass

    def to_message(self):
        message_class = command_to_message_class(self.command)
        return message_class.from_payload(self.payload)

    def __repr__(self):
        return f"<Message command={self.command} payload={self.payload}>"

In [12]:
import socket

PEER_IP = "35.187.200.6"
PEER_PORT = 8333

# magic "version" bytestring
VERSION = b'\xf9\xbe\xb4\xd9version\x00\x00\x00\x00\x00j\x00\x00\x00\x9b"\x8b\x9e\x7f\x11\x01\x00\x0f\x04\x00\x00\x00\x00\x00\x00\x93AU[\x00\x00\x00\x00\x0f\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0f\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00rV\xc5C\x9b:\xea\x89\x14/some-cool-software/\x01\x00\x00\x00\x01'

sock = socket.socket()j
sock.connect((PEER_IP, PEER_PORT))

# initiate the "version handshake"
sock.send(VERSION)

# receive their "version" response
version_message = Packet.from_socket(sock)

print(version_message.payload)

SyntaxError: invalid syntax (<ipython-input-12-c73b8611158e>, line 9)

Our next task is to parse this payload. I'll let you in on a secret: parsing the payload will work almost exactly the same as the `Packet.from_socket` method above. 

You will need to look interpret [this chart](https://en.bitcoin.it/wiki/Protocol_documentation#version)

![image](./images/version-message.png)

Here's an exercise: parse the version field

In [13]:
def read_version(binary_stream):
    ### your code here ###
    # read and interpret bytes from the stream
    bytes_ = binary_stream.read(4)
    int_ = bytes_to_int(bytes_)
    return int_

In [14]:
import ipytest, pytest
import test_data

# ipytest.clean_tests("test_read_version*")

version_streams = test_data.make_version_streams()

def test_read_version_0():
    n = read_version(version_streams[0])
    assert n == 70015

def test_read_version_1():
    n = read_version(version_streams[1])
    assert n == 60001

def test_read_version_2():
    n = read_version(version_streams[2])
    assert n == 106
    
ipytest.run_tests(doctest=True)
ipytest.clean_tests("test_read_version*")

unittest.case.FunctionTestCase (test_read_version_0) ... ok
unittest.case.FunctionTestCase (test_read_version_1) ... ok
unittest.case.FunctionTestCase (test_read_version_2) ... ok

----------------------------------------------------------------------
Ran 3 tests in 0.005s

OK



Now, here's a further question: Given a version message binary stream, tell me whether the node that sent it can send a `pong` message ([hint](https://bitcoin.org/en/developer-reference#protocol-versions)).

In [15]:
def can_send_pong(binary_stream):
    ### your code here ###
    return read_version(binary_stream) >= 60001

In [16]:
version_streams = test_data.make_version_streams()

def test_can_send_pong_0():
    result = can_send_pong(version_streams[0])
    assert result == True

def test_can_send_pong_1():
    result = can_send_pong(version_streams[1])
    assert result == True

def test_can_send_pong_2():
    result = can_send_pong(version_streams[2])
    assert result == False
    
ipytest.run_tests(doctest=True)
ipytest.clean_tests("test_can_send_pong*")

unittest.case.FunctionTestCase (test_can_send_pong_0) ... ok
unittest.case.FunctionTestCase (test_can_send_pong_1) ... ok
unittest.case.FunctionTestCase (test_can_send_pong_2) ... ok

----------------------------------------------------------------------
Ran 3 tests in 0.014s

OK


Now we've got the hang of it.

Here's the outline of a `VersionMessage` class.

In [17]:
class VersionMessage:

    command = b"version"

    def __init__(self, version, services, timestamp, addr_recv, addr_from, 
                 nonce, user_agent, start_height, relay):
        self.version = version
        self.services = services
        self.timestamp = timestamp
        self.addr_recv = addr_recv
        self.addr_from = addr_from
        self.nonce = nonce
        self.user_agent = user_agent
        self.start_height = start_height
        self.relay = relay

    @classmethod
    def from_bytes(cls, payload):
        stream = io.BytesIO(payload)
        
        version = read_int(stream, 4)
        services = read_int(stream, 8)
        timestamp = read_int(stream, 8)
        addr_recv = read_address(stream)
        addr_from = read_addreas(stream)
        nonce = read_int(stream, 8)
        user_agent = read_varstr(stream)
        start_height = read_int(stream, 4)
        relay = read_bool(stream, 1)
        
        return cls(version, services, timestamp, addr_recv, addr_from, 
                   nonce, user_agent, start_height, relay)
    


If you consult the protocol documentation you will see that the `from_bytes` classmethod just translates the `Description` and `Data Type` columns into python code. 

Here we encounter some "types" that we're familiar with -- `int32_t` / `uint64_t` / `int64_t` -- which are different types in a low-level language like C++, but are all equivalent to the general `int` in Python. Our previously implemented `bytes_to_int` can handle these just fine.

But we also encounter some new types: `net_addr`, `varstr`, and `bool`.

`bool` is the simplest: it's just a Boolean, either `True` or `False` in Python. We'll learn how to read this first.

# "Boolean" fields

Please fill out this `read_bool` function.

In [18]:
def read_bool(stream):
    bytes_ = stream.read(1)
    if len(bytes_) != 1:
        raise RuntimeError("Stream ran dry")
    integer =  bytes_to_int(bytes_)
    boolean = bool(integer)
    return boolean

In [19]:
import test_data

def test_read_bool_0():
    stream = test_data.make_stream(test_data.true_bytes)
    result = read_bool(stream)
    assert type(result) == bool
    assert result is True
    
def test_read_bool_1():
    stream = test_data.make_stream(test_data.false_bytes)
    result = read_bool(stream)
    assert type(result) == bool
    assert result is False

Once you get your `read_bool` function to pass these tests by successfully reading `True` and `False` values, I want you to implement one more thing.

I want you to raise a RuntimeError if `stream.read(n)` doesn't return a byte string of length `n`. This is just a check to make sure that our program is running correctly.

In [20]:
def test_read_bool_2():
    stream = test_data.make_stream(b"")
    with pytest.raises(RuntimeError) as e_info:
        result = read_bool(stream)

ipytest.run_tests(doctest=True)
ipytest.clean_tests("test_read_bool_*")

unittest.case.FunctionTestCase (test_read_bool_0) ... ok
unittest.case.FunctionTestCase (test_read_bool_1) ... ok
unittest.case.FunctionTestCase (test_read_bool_2) ... ok

----------------------------------------------------------------------
Ran 3 tests in 0.004s

OK


# "Variable Length" fields

Next comes [`var_str`], the type of the "User Agent", which is basically an advertisement of the Bitcoin software implementation that the node is using. You can see a listing of popular values [here](https://bitnodes.earn.com/nodes/).

["Variable Length Strings"](https://en.bitcoin.it/wiki/Protocol_documentation#Variable_length_string) are used for values that string values of undependable length. This technique is able to encode strings containing up to 9 bytes of information, but only will only take up 1 byte of information if the string is only 1 byte.

How does it work?

It's very simple. It's a [`var_int`](https://en.bitcoin.it/wiki/Protocol_documentation#Variable_length_integer) (variable length integer) followed followed by n string bytes -- where n is the integer value of the `var_int` field.

How does `var_int` work?

The first byte of a `var_int` is a marker which says how many bytes come after it:
* `0xFF`: 8 byte integer follows
* `0xFE`: 4 byte integer follows
* `0xFD`: 2 byte integer follows
* < `0xFD`: interpret first bit as a 1 byte integer

So let's first implement `var_int`, since `var_str` depends on it.

In [21]:
def read_var_int(stream):
    i = stream.read(1)[0]
    if i == 0xfd:
        return bytes_to_int(stream.read(2))
    elif i == 0xfe:
        return bytes_to_int(stream.read(4))
    elif i == 0xff:
        return bytes_to_int(stream.read(8))
    else:
        return i

In [30]:
import ipytest, pytest
import test_data as td

enumerated = (
    (td.eight_byte_int, td.eight_byte_var_int),
    (td.four_byte_int, td.four_byte_var_int),
    (td.two_byte_int, td.two_byte_var_int),
    (td.one_byte_int, td.one_byte_var_int),
)

def test_read_var_int():
    for correct_int, var_int in enumerated:
        stream = td.make_stream(var_int)
        calculated_int = read_var_int(stream)
        assert correct_int == calculated_int

ipytest.run_tests(doctest=True)
ipytest.clean_tests("test_read_var_int*")

unittest.case.FunctionTestCase (test_read_var_int) ... ok

----------------------------------------------------------------------
Ran 1 test in 0.003s

OK


[`net_addr`](https://en.bitcoin.it/wiki/Protocol_documentation#Network_address) is the most complicated, so we'll handle it last.

Now that we have that out of the way, let's implement `read_var_str`:

In [23]:
def read_var_str(stream):
    length = read_var_int(stream)
    string = stream.read(length)
    return string

In [32]:
import ipytest, pytest
import test_data as td

enumerated = (
    (td.short_str, td.short_var_str),
    (td.long_str, td.long_var_str),
)

def test_read_var_str():
    for correct_byte_str, var_str in enumerated:
        stream = td.make_stream(var_str)
        calculated_byte_str = read_var_str(stream)
        assert correct_byte_str == calculated_byte_str

ipytest.run_tests(doctest=True)
ipytest.clean_tests("test_read_var_str*")

unittest.case.FunctionTestCase (test_read_var_str) ... ok

----------------------------------------------------------------------
Ran 1 test in 0.001s

OK


In [4]:
class BaseMessage:

    def to_packet(self):
        return Packet(self.command, self.to_bytes())


class VersionMessage(BaseMessage):

    command = b"version"

    def __init__(self):
        pass

    @classmethod
    def from_payload(cls, payload):
        stream = io.BytesIO(payload)
        ...
        return cls(p1, p2, p3)

    def to_bytes(self):
        pass    