In [2]:
import base64, os
import multihash as mh
import hashlib as hl  
from dataclasses import dataclass

## Kademlia key implementation taken from https://github.com/libp2p/py-libp2p-xor/blob/master/key/key.py

In [3]:
# bits_in_byte returns a list of bits in a byte, in descending order of significance.
def bits_in_byte(byte):
    return [
        byte & 0x80 != 0,
        byte & 0x40 != 0,
        byte & 0x20 != 0,
        byte & 0x10 != 0,
        byte & 0x8 != 0,
        byte & 0x4 != 0,
        byte & 0x2 != 0,
        byte & 0x1 != 0,
    ]


class Key(bytes):
    def bit_len(self):
        return len(self) * 8

    def bit_at(self, offset):
        return self[offset // 8] & (1 << (7 - offset % 8))

    def to_float(self):
        f = 0.0
        s = 1.0
        for byte in self:
            for bit in bits_in_byte(byte):
                s /= 2.0
                if bit:
                    f += s
        return f

    def __eq__(self, other):
        if isinstance(other, Key):
            return self.hex() == other.hex()
        else:
            return False

    def __hash__(self):
        return hash(self.hex())

    def __str__(self):
        return self.hex()


def choose_key(n):
    return Key(bytearray(os.urandom(n)))


def xor_key(x: Key, y: Key):
    return Key(bytes([x[k] ^ y[k] for k in range(len(x))]))


def key_from_base64_kbucket_id_optional(s: str):
    return key_from_base64_kbucket_id(s) if s else None


def key_from_base64_kbucket_id(s: str):
    return Key(base64.b64decode(s))

## Useful helper functions

In [4]:
def bytes_to_bit_string(data: bytes) -> str:
    return "".join(f'{byte:08b}' for byte in data)
  
def multihash_to_kad_id(peer_id: str) -> bytes:  
    multi_hash = mh.from_b58_string(peer_id)
    return hl.sha256(multi_hash).digest()
# this should be a clean way (even if it could be done in less lines it is more readable)
def xor_distance(bytes0: bytes, bytes1: bytes):
    xor=bytearray()
    maxlen=max(len(bytes0), len(bytes1))

    for i in range(maxlen):
        byte0 = bytes0[i if len(bytes0)>len(bytes1) else i-len(bytes1)+len(bytes0)] if i >= maxlen-len(bytes0) else 0
        byte1 = bytes1[i if len(bytes1)>len(bytes0) else i-len(bytes0)+len(bytes1)] if i >= maxlen-len(bytes1) else 0
        xor.append(byte0 ^ byte1)

    return bytes(xor)

# get the corresponding k-bucket for the given XOR distance in bytes
def bucket_number_for_distance(d: bytes) -> int:
    count=0
    # iterate on the bytes from left to right
    for b in d:
        # while the byte==0, add 8 (bits) to the counter
        count+=8
        if b!=0:
            # at the first non null byte, shift right until this byte==0
            while b!=0:
                b>>=1
                # for each right shift, remove 1 to counter
                count-=1
            break
    # return the length of the byte string minus the number of leading 0 bits
    return 8*len(d)-count


##### TESTING METHODS #####

def bitstring_to_bytes(s):
    v = int(s, 2)
    b = bytearray()
    while v:
        b.append(v & 0xff)
        v >>= 8
    return bytes(b[::-1])

## XOR trie implementation taken from https://github.com/libp2p/py-libp2p-xor/blob/master/trie/trie.py

In [5]:
@dataclass
class Trie1:
    branch: (any, any)
    key: Key

    def __init__(self):
        self.branch = (None, None)
        self.key = None

    def is_empty(self):
        return not self.key

    def is_leaf(self):
        return not self.branch[0] and not self.branch[1]

    def is_empty_leaf(self):
        return self.is_empty() and self.is_leaf()

    def is_non_empty_leaf(self):
        return not self.is_empty() and self.is_leaf()

    def size(self):
        return self.size_at_depth(0)

    def size_at_depth(self, depth):
        if self.is_leaf():
            return 0 if self.is_empty() else 1
        else:
            return self.branch[0].size_at_depth(depth + 1) + self.branch[1].size_at_depth(depth + 1)

    def add(self, key):
        return self.add_at_depth(0, key)

    def add_at_depth(self, depth, key):
        if self.is_empty_leaf():
            self.key = key
            return depth, True
        elif self.is_non_empty_leaf():
            if key == self.key:
                # key already in trie
                return depth, False
            else:
                p = self.key
                self.key = None
                self.branch = (Trie(), Trie())
                self.branch[p.bit_at(depth)].key = p
                return self.branch[key.bit_at(depth)].add_at_depth(depth + 1, key)
        else:
            return self.branch[key.bit_at(depth)].add_at_depth(depth + 1, key)

    def remove(self, key):
        return self.remove_at_depth(0, key)

    def remove_at_depth(self, depth, key):
        if self.is_empty_leaf():
            return depth, False
        elif self.is_non_empty_leaf():
            self.key = None
            return depth, True
        else:
            d, removed = self.branch[key.bit_at(depth)].remove_at_depth(depth + 1, key)
            if removed:
                self.shrink()
                return d, True
            else:
                return d, False

    def find(self, key):
        return self.find_at_depth(0, key)

    def find_at_depth(self, depth, key):
        if self.is_empty_leaf():
            return None, depth
        elif self.is_non_empty_leaf():
            return self.key, depth
        else:
            return self.branch[key.bit_at(depth)].find_at_depth(depth + 1, key)

    def list_of_depths(self):
        return self.list_of_depths_at_depth(0)

    def list_of_depths_at_depth(self, depth):
        if self.is_empty_leaf():
            return []
        elif self.is_non_empty_leaf():
            return [depth]
        else:
            l0 = self.branch[0].list_of_depths_at_depth(depth + 1)
            l1 = self.branch[1].list_of_depths_at_depth(depth + 1)
            return l0 + l1

    def shrink(self):
        b0, b1 = self.branch[0], self.branch[1]
        if b0.is_empty_leaf() and b1.is_empty_leaf():
            self.branch = (None, None)
        elif b0.is_empty_leaf() and b1.is_non_empty_leaf():
            self.key = b1.key
            self.branch = (None, None)
        elif b0.is_non_empty_leaf() and b1.is_empty_leaf():
            self.key = b0.key
            self.branch = (None, None)

## Peer class

In [6]:
class Peer(object):

    def __init__(self, index, key, neighbors, buckets, alive):
        self.index = index # not necessary, but convienient to work with Nebula Crawler
        self.key = key
        # neighbors ordered from closer to farthest
        # TODO: or the other way round?
        self.neighbors = neighbors
        # bucket 0 contains the farthest peers (full), high buckets are empty
        # TODO: dictionary?
        self.buckets = buckets
        self.alive = alive
                
    def distance(self, p):
        return xor_key(self.key, p.key)
        


## Nebula Crawler database queries

In [7]:
import psycopg2

# query to connect to the postgresql db
postgres_connect_query="host=127.0.0.1 dbname=nebula user=nebula password=password"
# query to get all peers
get_peers_query="select id,multi_hash from peers;"
# query to retrieve all neighbors relations between peers
get_neighbors_query="select peer_id,neighbor_ids from neighbors where crawl_id=2;"

# indexes in the postgresql peers/neighbors db for multihashes and ids (topo id)
peers_id_col=0       # for both peers and neighbors query results
peers_mh_col=1       # for peers query results
neighbor_ids_col=1   # for neighbors query results


## Query the Nebula DB

In [31]:
# setup connection to postgresql db
conn = psycopg2.connect(postgres_connect_query)
cur = conn.cursor()

# query peers nebulaID and peerID
cur.execute(get_peers_query)
#peer_list = {l[peers_id_col]: [multihash_to_kad_id(l[peers_mh_col]), l[peers_mh_col]] for l in cur.fetchall()}
peer_list = {l[peers_id_col]: l[peers_mh_col] for l in cur.fetchall()}

# query relations between peers (which peer is in which peer's routing table)
cur.execute(get_neighbors_query)
neighbors_relations = {node:neighbors for (node, neighbors) in cur.fetchall()}

# line format: nebula_id, peer_id, neighbor_1_nebula_id, neighbor_2_nebula_id, ... neighbor_n_nebula_id
# if no neighbor: nebula_id, peer_id
nebula_peers = [[i, peer_list[i]] + (neighbors_relations[i] if i in neighbors_relations else []) for i in peer_list]


## Save data to disk to access it without having to start Nebula

In [36]:
import csv

filename = "nebula-peers.csv"

In [34]:
with open(filename, 'w') as file:
    csvwriter = csv.writer(file)
    csvwriter.writerows(nebula_peers)

In [38]:
with open(filename, 'r') as file:
    nebula_peers = [line for line in csv.reader(file)]

## Build the trie

In [56]:
class NebulaPeer:
    def __init__(self, nebula_id, peer_id, neighbors):
        self.nebula_id = nebula_id
        self.peer_id = peer_id
        self.neighbors = neighbors
        
        self.key = Key(multihash_to_kad_id(peer_id))
        
    def __str__(self):
        return "nebula_id: "+str(self.nebula_id)+", peer_id: "+str(self.peer_id)+", neighbors: "+str(self.neighbors)

In [57]:
peers = [NebulaPeer(line[0], line[1], line[2:]) for line in nebula_peers]

# Defining a new Trie

Each node has references to 0 or 2 children and 1 parent. It contains its own key, size, (depth,) and reference to Peer object (containing the list of neighbors) for leaves.

The difference with the above python implementation, is that is supports depth skips, doesn't rely on depth, has bottom up references (2 way parent-children link), and points to an Object.

## Attributes
## parent
pointer to parent node: TrieNode
## children
pointers to 2 children (any,any)
for non-leaves only
## size
size(children_0)+size(children_1)
## peer
pointer to peer (for leaves only)
## key
key identifying the TrieNode

## Functions
### add(key,node)
adds a node to the trie. from the root, go down the trie until we don't match anymore. If the direction has already a link, it's a skip -> create a fork at the appropriate level between the next hop and the node. Else, add a link to the newly created node
### size()
returns the number of leaves in the (sub-)trie
the size comptutation takes O(n)
this should be an attribute and not a method
### find(key)
return True if key in trie

### closestN(key,n)
returns the n closest keys/TrieNode to given key

### intersection(trie)
to check intersection between global trie and local peer knowledge trie
maybe not very useful after all. global trie has all knowledge from local nodes by definition by Nebula crawler
### union(trie)
similar to interesection

# Peer
A node object should know (its depth in Trie), its neighbors (possibly ranked from closest to furthest O(n)), in buckets (possibly later with the time the neighbors have been in the routing table), alive/dead bool, key, peerid (missing peers observed in the Trie)
A list of peers should be easy to iterate on

In [None]:
class Peer:
    
    def __init__(self, key, neighbors):
        self.key = key

    # returns peer_id associated with self.key
    def peer_id(self) -> str:
        pass
    
    def alive(self) -> bool:
        return 

In [66]:
class TrieNode:
    
    def __init__(self, key="", parent=None, peer=None):
        self.key = key
        self.parent = parent
        self.children = (None, None)
        self.peer = peer
        self.size = 0
        
    def compute_size(self) -> int:
        size = 0
        for i in range(len(self.children)):
            if self.children[i] is not None:
                size+=self.children[i]
        self.size = size
        
    def add(self, peer) -> bool:
        if self.children[peer.key[len(self.key)]] is not None:
            # child already exists, go down the branch
            child = self.children[peer.key[len(self.key)]]
            if peer.key[:len(child.key)] == child.key:
                if len(peer.key) == len(child.key):
                    # child == peer, we cannot insert peer in the trie
                    return False
                else:
                    # peer should be a child of child
                    success = child.add(peer)
            else:
                # skip
                pass
            # not exactly, we have to check the depth of the next level
            # xor peer.key with children[1].key to decide where to fork
            # TODO
        else:
            # leaf
            pass
        
        if success:
            self.size += 1
        
        return success
    

In [58]:
trie = Trie()
for p in peers:
    trie.add(p.key)

In [64]:
print(max(trie.list_of_depths()))

31
