# Python Binary Trie
Github: [py-binary-trie](https://github.com/guillaumemichel/py-binary-trie)

Pypi: [binary-trie](https://pypi.org/project/binary-trie/)

Install with `pip install binary-trie`

In [2]:
from binary_trie import Trie, bytes_to_bitstring, bitstring_to_bytes, int_to_bitstring, bitstring_to_int

In [3]:
trie = Trie()
trie.add("0010")
trie.add(4*"0")
trie.add(int_to_bitstring(3, l=4))
trie.add(bytes_to_bitstring(b'\x0e', l=4))

True

In [4]:
trie.find("0010") # True

True

In [5]:
trie.find("0100") # False

False

## Building a basic trie

In [6]:
t = Trie()

nodeIDs = [2,3,4,6,7,9,11,13]
for i in nodeIDs:
    t.add(int_to_bitstring(i, 4))

<img title="a title" alt="Alt text" src="./trie.svg">

In [8]:
t.find("0011")
t.find("0000")

False

In [7]:
t.n_closest("0101", 3)

['0100', '0111', '0110']

In [8]:
t.n_closest("0110", 8)

['0110', '0111', '0100', '0010', '0011', '1101', '1011', '1001']

## IPFS nodes

In [9]:
import csv

filename = "nebula-peers.csv"
with open(filename, 'r') as file:
    nebula_peers = [line for line in csv.reader(file)]

In [10]:
import multihash as mh
import hashlib as hl  

def multihash_to_kad_id(peer_id: str) -> bytes:  
    multi_hash = mh.from_b58_string(peer_id)
    return hl.sha256(multi_hash).digest()

# this should be a clean way (even if it could be done in less lines it is more readable)
def xor_distance(bytes0: bytes, bytes1: bytes):
    xor=bytearray()
    maxlen=max(len(bytes0), len(bytes1))

    for i in range(maxlen):
        byte0 = bytes0[i if len(bytes0)>len(bytes1) else i-len(bytes1)+len(bytes0)] if i >= maxlen-len(bytes0) else 0
        byte1 = bytes1[i if len(bytes1)>len(bytes0) else i-len(bytes0)+len(bytes1)] if i >= maxlen-len(bytes1) else 0
        xor.append(byte0 ^ byte1)

    return bytes(xor)

def bit_string_to_bytes(s):
    v = int(s, 2)
    b = bytearray()
    while v:
        b.append(v & 0xff)
        v >>= 8
    return bytes(b[::-1])

# get the corresponding k-bucket for the given XOR distance in bytes
def bucket_number_for_distance(d: bytes) -> int:
    count=0
    # iterate on the bytes from left to right
    for b in d:
        # while the byte==0, add 8 (bits) to the counter
        count+=8
        if b!=0:
            # at the first non null byte, shift right until this byte==0
            while b!=0:
                b>>=1
                # for each right shift, remove 1 to counter
                count-=1
            break
    # return the length of the byte string minus the number of leading 0 bits
    return 256-(8*len(d)-count)


class NebulaPeer:
    def __init__(self, nebula_id, peer_id, neighbors_ids):
        self.nebula_id = nebula_id
        self.peer_id = peer_id
        self.neighbors_ids = neighbors_ids
        
        self.key = multihash_to_kad_id(peer_id)
        
        self.alive = len(neighbors_ids)>0
        
        self.buckets = [[] for _ in range(257)]
        self.neighbors = []
        
    def distance(self, p):
        return xor_distance(self.key, p.key)
        
    def addNeighbor(self, peer):
        self.neighbors.append(peer)
        self.buckets[bucket_number_for_distance(self.distance(peer))].append(peer)
        
    def __str__(self):
        return "nebula_id: "+str(self.nebula_id)+", peer_id: "+str(self.peer_id)+", neighbors: "+str(self.neighbors_ids)

In [11]:
peers = {line[0]: NebulaPeer(line[0], line[1], line[2:]) for line in nebula_peers}

In [12]:
print(len(peers))

30453


In [15]:
t = Trie()
for p in peers:
    t.add(bytes_to_bitstring(peers[p].key), metadata=peers[p])

In [20]:
key="1111111111111100000111001011001111111110101001111001010111011011110010000111111100000100011100000000110110010001100101010100010000010010001100101000100111011100100101101110101011000011011111111101011001101001101111010011000001000010011001011010100001101100"
t.n_closest(key, 21)[1:]

False