In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import os
from transaction_simulator import *

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

# 1.) Load parameters

In [None]:
import sys
from datawand.parametrization import ParamHelper

In [None]:
ph = ParamHelper('..', 'LNGraph', sys.argv)

experiment_id = ph.get("experiment_id")
snapshot_id = ph.get("snapshot_id")
amount_sat = 200000#15USD #ph.get("amount_sat")
num_transactions = 2000#ph.get("num_transactions")

In [None]:
experiment_id = ph.get("experiment_id")
snapshot_id = ph.get("snapshot_id")
amount_sat = ph.get("amount_sat")
num_transactions = ph.get("num_transactions")
alpha = ph.get("alpha")
day_interval = ph.get("day_interval")
drop_disabled = ph.get("drop_disabled")

experiment_id += "__"

In [None]:
data_dir = ph.get("data_dir")
output_dir = "%s/simulations/%s/%s" % (data_dir, snapshot_id, experiment_id)
print(output_dir)

In [None]:
incremental_experiment = False#True

In [None]:
opt_fee_changes = dict([])
if incremental_experiment:
    policy_file = output_dir+"/opt_fees.csv"
    if os.path.exists(policy_file):
        print("opt fee policy found!")
        experiment_id += "_"
        opt_fee_policy = pd.read_csv(policy_file)
        opt_fee_policy = opt_fee_policy[opt_fee_policy["opt_delta"]>0]
        opt_fee_changes = dict(zip(opt_fee_policy["node"],opt_fee_policy["opt_delta"]))
        output_dir = "%s/simulations/%s/%s" % (data_dir, snapshot_id, experiment_id)

In [None]:
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# 2.) Load data

In [None]:
snapshots = pd.read_csv("%s/directed_graphs/directed_temporal_multi_edges_%idays.csv" % (data_dir, day_interval))

In [None]:
if len(opt_fee_changes) > 0:
    snapshots["fee_base_msat"] = snapshots[["trg","fee_base_msat"]].apply(lambda x: x["fee_base_msat"]+opt_fee_changes.get(x["trg"],0.0), axis=1)

In [None]:
snapshots.head(2)

In [None]:
node_meta = pd.read_csv("%s/node_meta_with_labels.csv" % data_dir)

In [None]:
providers = list(node_meta["pub_key"])
len(providers)

In [None]:
snapshots.head()

In [None]:
edges = snapshots[snapshots["snapshot_id"]==snapshot_id]

In [None]:
edges.groupby("src")["capacity"].sum().mean()

In [None]:
edges.groupby("src")["capacity"].sum().median()

In [None]:
edges.groupby("src")["capacity"].sum().hist(range=(0,10**8), bins=50)

edges["capacity"].mean()

edges["capacity"].min()

edges["capacity"].max()

edges["capacity"].median()

# 3.) Simulation

In [None]:
simulator = TransactionSimulator(edges, providers, amount_sat, num_transactions, alpha=alpha, drop_disabled=drop_disabled)

In [None]:
transactions = simulator.transactions

In [None]:
transactions["source"].value_counts().hist()

In [None]:
transactions["target"].value_counts().hist(bins=50)

nx.number_weakly_connected_components(simulator.G)

- 20K trans, alpha=2.0, drop disabled - 2min 21s
- 20K trans, alpha=2.0, drop disabled + less tmp target - 1min 29s
- 20K trans, alpha=2.0, drop disabled + less tmp target + wout cost_dict - 1min 14s

In [None]:
%%time
shortest_paths, alternative_paths, all_router_fees = simulator.simulate(weight="total_fee", max_threads=8)

In [None]:
alma

In [None]:
total_income, total_fee = simulator.export(output_dir)

In [None]:
shortest_paths.head()

In [None]:
alternative_paths.head()

In [None]:
shortest_paths["length"].hist()
alternative_paths["length"].hist(alpha=0.6)

harmonic_sums, routing_differences = calculate_node_influence(shortest_paths, alternative_paths)

harmonic_sums.reset_index().to_csv(output_file, index=False)

harmonic_sums["cost_diff"].mean()

harmonic_sums.head(10)

harmonic_sums.tail(10)

# 4. Some stats

In [None]:
total_income.sum()

In [None]:
num_router_nodes = len(all_router_fees["node"].unique())

In [None]:
activity_cnt = all_router_fees["node"].value_counts()
active_routers = list(activity_cnt[activity_cnt > 10].index)

In [None]:
len(active_routers) / num_router_nodes

In [None]:
fee_subset = all_router_fees[all_router_fees["node"].isin(active_routers)]

In [None]:
activity_df = pd.DataFrame()
activity_df["traffic"] = fee_subset["node"].value_counts()
activity_df["mean_fee"] = fee_subset.groupby("node")["fee"].mean()

### Router nodes with high traffic do NOT ask higher fees!!!

In [None]:
activity_df.corr(method="spearman")

# 5. Analyze optimal routing fee for nodes

## a.) optimal routing fee for all nodes

In [None]:
opt_fees_df, p_altered = calc_optimal_base_fee(shortest_paths, alternative_paths, all_router_fees)

In [None]:
opt_fees_df.to_csv("%s/opt_fees.csv" % output_dir, index=False)

### Total income revenue

In [None]:
opt_fees_df["income_diff"] = opt_fees_df["opt_income"] - opt_fees_df["origi_income"]

In [None]:
opt_fees_df["income_diff"].sum()

In [None]:
opt_fees_df[opt_fees_df["income_diff"]>10000].set_index("node")

### Income gain for nodes with most original income

x = [10,20,50,100,200]
y = []
for k in x:
    y.append(opt_fees_df[opt_fees_df["node"].isin(list(total_income[:k].index))]["income_gain"].mean())

print(list(zip(x,y)))

## b.) Visualization of optimal routing fee calculation

#### top 5 routers

for n in list(p_altered["node"].value_counts()[:5].index):
    _, _, _, _, _ = calculate_max_income(n, p_altered, shortest_paths, all_router_fees, visualize=True)

#### rnd 5 routers

for n in list(p_altered.sample(5)["node"]):
    _, _, _, _, _ = calculate_max_income(n, p_altered, shortest_paths, all_router_fees, visualize=True)

# Experimenting with shortest path search

G = simulator.G

pop_trgs = transactions["target"].value_counts()[:3].index

most_pop_trans = transactions[transactions["target"]==pop_trgs[0]] 

len(most_pop_trans)

def simulate_origi():
    paths = []
    for idx, row in most_pop_trans.iterrows():
        try:
            p = nx.shortest_path(G, source=row["source"], target=row["target"] + "_trg", weight="total_fee")
        except RuntimeError as re:
            raise re
        except:
            p = []
        finally:
            paths.append(p)
    return paths

def simulate_rev():
    G_rev = G.reverse(copy=False)
    paths = []
    for idx, row in most_pop_trans.iterrows():
        try:
            p = nx.shortest_path(G_rev, target=row["source"], source=row["target"] + "_trg", weight="total_fee")
            p.reverse()
        except RuntimeError as re:
            raise re
        except:
            p = []
        finally:
            paths.append(p)
    return paths

- 1426 sim for most pop as target (less pseudo target) - 1.45 s
- 1379 sim for most pop as target - 2.6 s
- 1379 sim for most pop as source (duplicated) - 56.8 s

%%time
paths1 = simulate_origi()

%%time
paths2 = simulate_rev()

paths2[-1] == paths1[-1]