Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
431 lines (353 sloc) 16.6 KB
'''
Created on 26.08.2018
@author: rpickhardt
lib_autopilot is a library which based on a networkx graph tries to
predict which channels should be added for a new node on the network. The
long term is to generate a lightning network with good topological properties.
This library currently uses 4 heuristics to select channels and supports
two strategies for combining those heuristics.
1.) Diverse: which tries to to get nodes from every distribution
2.) Merge: which builds the mixture distribution of the 4 heuristics
The library also estimates how much funds should be used for every newly
added channel. This is achieved by looking at the average channel capacity
of the suggested channel partners. A probability distribution which is
proportional to those capacities is created and smoothed with the uniform
distribution.
The 4 heuristics for channel partner suggestion are:
1.) Random: following the Erdoes Renyi model nodes are drawn from a uniform
distribution
2.) Central: nodes are sampled from a distribution proportional to the
betweeness centrality of nodes
3.) Decrease Diameter: nodes are sampled from distribution of the nodes which
favors badly connected nodes
4.) Richness: nodes with high liquidity are taken and it is sampled from a
uniform distribution of those
The library is supposed to be extended by a simulation framework which can
be used to evaluate which strategies are useful on the long term. For this
heavy computations (like centrality measures) might have to be reimplemented
in a more dynamic way.
Also it is important to understand that this program is not optimized to run
efficiently on large scale graphs with more than 100k nodes or on densly
connected graphs.
the programm needs the following dependencies:
pip install networkx numpy
'''
"""
ideas:
* should we respect our own channel balances?
* respect node life time / uptime? or time of channels?
* include more statistics of the network:
* allow autopilots of various nodes to exchange some information
* exchange algorithms if the network grows.
* include better handling for duplicates and existing channels
* cap number of channels for well connected nodes.
* channel balance of automatic channels should not be more than 50% of
cummulative channel balance of destination node
next steps:
* test if the rankings from the heuristics are statistically independent
* evaluate / simulate which method produces graphs with desirable properties
"""
from operator import itemgetter
import logging
import math
import pickle
import networkx as nx
import numpy as np
class Strategy:
#define constants. Never changed as they are part of the API
DIVERSE = "diverse"
MERGE = "merge"
class Autopilot():
def __init__(self, G):
self.__add_logger()
self.G = G
def __add_logger(self):
""" initiates the logging service for this class """
# FIXME: adapt to the settings that are proper for you
self.__logger = logging.getLogger('lib-autopilot')
self.__logger.setLevel(logging.INFO)
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
self.__logger.addHandler(ch)
def __sample_from_pdf(self,pdf,k=21):
"""
helper function to quickly sample from a pdf encoded in a dictionary
"""
if type(k) is not int:
raise TypeError("__sample_from: k must be an integer variable")
if k < 0 or k > 21000:
raise ValueError("__sample_from: k must be between 0 and 21000")
keys,v = zip(*list(pdf.items()))
if k>=len(keys):
return keys
res = np.random.choice(keys, k, replace=False, p=v)
return res
def __sample_from_percentile(self, pdf, percentile=0.5, num_items=21):
"""
only look at the most likely items and sample from those
"""
if not percentile:
return self.__sample_from_pdf(pdf,num_items)
if type(percentile) is not float:
raise TypeError("percentile must be a floating point variable")
if percentile < 0 or percentile > 1:
raise ValueError("percentile must be btween 0 and 1")
cumsum = 0
used_pdf = {}
for n, value in sorted(
pdf.items(), key=itemgetter(1), reverse=True):
cumsum += value
used_pdf[n] = value
if cumsum > percentile:
break
used_pdf = {k:v/cumsum for k, v in used_pdf.items()}
return self.__sample_from_pdf(used_pdf, num_items)
def __get_uniform_pdf(self):
"""
Generates a uniform distribution of all nodes in the graph
In opposite to other methods there are no arguments for smoothing
or skewing since this would not do anything to the uniform
distribution
"""
pdf = {n:1 for n in self.G.nodes()}
length = len(pdf)
return {k:v/length for k, v in pdf.items()}
def __get_centrality_pdf(self, skew = False, smooth = False):
"""
produces a probability distribution which is proportional to nodes betweeness centrality scores
the betweeness centrality counts on how many shortest paths a node is
connecting to thos nodes will most likely make them even more central
however it is good for the node operating those operation as this node
itself gets a position in the network which is close to central nodes
this distribution can be skewed and smoothed
"""
self.__logger.info(
"CENTRALITY_PDF: Try to generate a PDF proportional to centrality scores")
pdf = {}
cumsum = 0
for n, score in nx.betweenness_centrality(self.G).items():
pdf[n] = score
cumsum += score
#renoremalize result
pdf = {k:v/cumsum for k, v in pdf.items()}
self.__logger.info(
"CENTRALITY_PDF: Generated pdf")
if skew and smooth:
self.__logger.info(
"CENTRALITY_PDF: Won't skew and smooth distribution ignore both")
smooth = False
skew = False
return self.__manipulate_pdf(pdf, skew, smooth)
def __get_rich_nodes_pdf(self,skew=False,smooth=False):
"""
Get a PDF proportional to the cummulative capacity of nodes
The probability density function is calculated by looking at the
cummulative capacity of all channels one node is part of.
The method will by default skew the pdf by taking the squares of the
sums of capacitoes after deriving a pdf. If one whishes the method
can also be smoothed by taking the mixture distribution with the
uniform distribution
Skewing and smoothing is controlled via the arguments skew and smooth
"""
self.__logger.info(
"RICH_PDF: Try to retrieve a PDF proportional to capacities")
rich_nodes = {}
network_capacity = 0
candidates = []
for n in self.G.nodes():
total_capacity = sum(
self.G.get_edge_data(
n, m)["satoshis"] for m in self.G.neighbors(n))
network_capacity += total_capacity
rich_nodes[n] = total_capacity
rich_nodes = {k:v/network_capacity for k, v in rich_nodes.items()}
self.__logger.info(
"RICH_PDF: Generated a PDF proportional to capacities")
if skew and smooth:
self.__logger.info(
"RICH_PDF: Can't skew and smooth distribution ignore both")
smooth = False
skew = False
return self.__manipulate_pdf(rich_nodes, skew, smooth)
def __get_long_path_pdf(self,skew=True,smooth=False):
"""
A probability distribution in which badly connected nodes are likely
This method looks at all pairs shortest paths and takes the sum of all
path lenghts for each node and derives the a probability distribution
from the sums. The idea of this method is to find nodes which are
increasing the diameter of the network.
The method will by default skew the pdf by taking the squares of the
sums of path lengths before deriving a pdf. If one whishes the method
can also be smoothed by taking the mixture distribution with the
uniform distribution
Skewing and smoothing is controlled via the arguments skew and smooth
"""
if skew and smooth:
self.__logger.info(
"DECREASE DIAMETER: Can't skew and smooth distribution ignore smoothing")
smooth = False
path_pdf = {}
self.__logger.info(
"DECREASE DIAMETER: Generating probability density function")
all_pair_shortest_path_lengths = nx.shortest_path_length(self.G)
for node, paths in all_pair_shortest_path_lengths:
path_sum = sum(length for _, length in paths.items())
path_pdf[node] = path_sum
s = sum(path_pdf.values())
path_pdf = {k:v/s for k,v in path_pdf.items()}
self.__logger.info(
"DECREASE DIAMETER: probability density function created")
path_pdf = self.__manipulate_pdf(path_pdf, skew, smooth)
return path_pdf
def __manipulate_pdf(self, pdf, skew=True, smooth=False):
"""
helper function to skew or smooth a probability distribution
skewing is achieved by taking the squares of probabilities and
re normalize
smoothing is achieved by taking the mixture distribution with the
uniform distribution
smoothing and skewing are not inverse to each other but should also
not happen at the same time. The method will however not prevent this
"""
if not skew and not smooth: #nothing to do
return pdf
length = len(pdf)
if skew:
self.__logger.info(
"manipulate_pdf: Skewing the probability density function")
pdf = {k:v**2 for k,v in pdf.items()}
s = sum(pdf.values())
pdf = {k:v/s for k,v in pdf.items()}
if smooth:
self.__logger.info(
"manipulate_pdf: Smoothing the probability density function")
pdf = {k:0.5*v + 0.5/length for k,v in pdf.items()}
return pdf
def __create_pdfs(self):
res = {}
res["path"] = self.__get_long_path_pdf()
res["centrality"] = self.__get_centrality_pdf()
res["rich"] = self.__get_rich_nodes_pdf()
res["uniform"] = self.__get_uniform_pdf()
return res
def calculate_statistics(self, candidates):
"""
computes statistics of the candidate set about connectivity, wealth
and returns a probability density function (pdf) which encodes which
percentage of the funds should be used for each channel with each
candidate node
the pdf is proportional to the average balance of each candidate and
smoothed with a uniform distribution currently the smoothing is just a
weighted arithmetic mean with a weight of 0.3 for the uniform
distribution.
"""
pdf = {}
for candidate in candidates:
neighbors = list(self.G.neighbors(candidate))
capacity = sum([self.G.get_edge_data(candidate, n)
["satoshis"] for n in neighbors])
average = capacity / (1+len(neighbors))
pdf[candidate] = average
cumsum = sum(pdf.values())
pdf = {k: v / cumsum for k, v in pdf.items()}
w = 0.7
print("percentage smoothed percentage capacity numchannels alias")
print("----------------------------------------------------------------------")
res_pdf = {}
for k, v in pdf.items():
neighbors = list(self.G.neighbors(k))
capacity = sum([self.G.get_edge_data(k, n)["satoshis"]
for n in neighbors])
name = k
if "alias" in self.G.node[k]:
name = self.G.node[k]["alias"]
print("{:12.2f} ".format(100 * v),
"{:12.2f} ".format(
100 * (w * v + (1 - w) / len(candidates))),
"{:10} {:10} ".format(capacity,
len(neighbors)),
name)
res_pdf[k] = (w * v + (1 - w) / len(candidates))
return res_pdf
def calculate_proposed_channel_capacities(self, pdf, balance=1000000):
minimal_channel_balance = 20000 # lnd uses 20k satoshi which seems reasonble
min_probability = min(pdf.values())
needed_total_balance = math.ceil(
minimal_channel_balance / min_probability)
self.__logger.info(
"Need at least a balance of {} satoshi to open {} channels".format(
needed_total_balance, len(pdf)))
while needed_total_balance > balance and len(pdf) > 1:
min_val = min(pdf.values())
k = [k for k, v in pdf.items() if v == min_val][0]
self.__logger.info(
"Not enough balance to open {} channels. Remove node: {} and rebalance pdf for channel balances".format(
len(pdf), k))
del pdf[k]
s = sum(pdf.values())
pdf = {k: v / s for k, v in pdf.items()}
min_probability = min(pdf.values())
needed_total_balance = math.ceil(
minimal_channel_balance / min_probability)
self.__logger.info(
"Need at least a balance of {} satoshi to open {} channels".format(
needed_total_balance, len(pdf)))
return pdf
def find_candidates(self, num_items=21,strategy = Strategy.DIVERSE,
percentile = None):
self.__logger.info("running the autopilot on a graph with {} nodes and {} edges.".format(
len(self.G.nodes()), len(self.G.edges())))
"""
Generates candidates with several strategies
"""
sub_k = math.ceil(num_items / 4)
self.__logger.info(
"GENERATE CANDIDATES: Try to generate up to {} nodes with 4 strategies: (random, central, network Improvement, liquidity)".format(num_items))
# FIXME: should remember from where nodes are known
res = self.__create_pdfs()
candidats = set()
# FIXME: Run simulations to decide the following problem:
"""
we can either do a global sampling by merging all probability
distributions and sample once from them or we can sample from
each probability distribution and merge the results. These processes
are obviously not commutative and we need to check which one seems
more reasonable.
My (renepickhardt) guts feeling says several samples which are
merged gives the best of all worlds where the other method would
probably result in something that is either pretty uniform or
dominated by one very skew distribution. as mentioned this needs
to be tested
"""
if strategy == Strategy.DIVERSE:
for strategy, pdf in res.items():
tmp = self.__sample_from_percentile(pdf, percentile, sub_k)
candidats = candidats.union(set(tmp))
elif strategy == Strategy.MERGE:
merged = {}
denominator = len(res)
for pdf in res.values():
for k, v in pdf.items():
if k not in merged:
merged[k] = v/denominator
else:
merged[k] += v/denominator
candidats = self.__sample_from_percentile(merged, percentile,
num_items)
"""
following code prints a list of candidates for debugging
for k in res:
if "alias" in self.G.node[key[k]]:
print(pdf[key[k]], self.G.node[key[k]]["alias"])
"""
if len(candidats) > num_items:
candidats = np.random.choice(list(candidats), num_items, replace=False)
self.__logger.info(
"GENERATE CANDIDATES: Found {} nodes with which channel creation is suggested".format(
len(candidats)))
return candidats
if __name__ == '__main__':
print("This lib needs to be given a network graph so you need to create a wrapper")
You can’t perform that action at this time.