In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import os
from transaction_simulator import *

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

# 1.) Load parameters

In [None]:
import sys
from datawand.parametrization import ParamHelper

In [None]:
ph = ParamHelper('..', 'LNGraph', sys.argv)

experiment_id = ph.get("experiment_id")
snapshot_id = ph.get("snapshot_id")
amount_sat = 200000#15USD #ph.get("amount_sat")
num_transactions = 2000#ph.get("num_transactions")

In [None]:
experiment_id = ph.get("experiment_id")
snapshot_id = ph.get("snapshot_id")
amount_sat = ph.get("amount_sat")
num_transactions = ph.get("num_transactions")

In [None]:
data_dir = ph.get("data_dir")
output_dir = "%s/simulations/%s" % (data_dir, snapshot_id)
print(output_dir)

In [None]:
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [None]:
output_file = "%s/%s.csv" % (output_dir, experiment_id)

# 2.) Load data

In [None]:
snapshots = pd.read_csv("%s/directed_temporal_multi_edges.csv" % data_dir)

In [None]:
node_meta = pd.read_csv("%s/node_meta_with_labels.csv" % data_dir)

In [None]:
providers = list(node_meta["pub_key"])
len(providers)

In [None]:
snapshots.head()

In [None]:
edges = snapshots[snapshots["snapshot_id"]==snapshot_id]

# 3.) Simulation

In [None]:
simulator = TransactionSimulator(edges, providers, amount_sat, num_transactions)

In [None]:
transactions = simulator.transactions

In [None]:
transactions["source"].value_counts().hist()

In [None]:
transactions["target"].value_counts().hist(bins=50)

#### - 10000 trans - 1 thread - 7 min

In [None]:
%%time
shortest_paths, alternative_paths, all_router_fees = simulator.simulate(weight="total_fee", max_threads=8)

In [None]:
total_income, total_fee = simulator.export(output_dir)

In [None]:
shortest_paths.head()

In [None]:
alternative_paths.head()

In [None]:
shortest_paths["length"].hist()
alternative_paths["length"].hist(alpha=0.6)

harmonic_sums, routing_differences = calculate_node_influence(shortest_paths, alternative_paths)

harmonic_sums.reset_index().to_csv(output_file, index=False)

harmonic_sums["cost_diff"].mean()

harmonic_sums.head(10)

harmonic_sums.tail(10)

# 4. Some stats

In [None]:
total_income.sum()

In [None]:
num_router_nodes = len(all_router_fees["node"].unique())

In [None]:
activity_cnt = all_router_fees["node"].value_counts()
active_routers = list(activity_cnt[activity_cnt > 10].index)

In [None]:
len(active_routers) / num_router_nodes

In [None]:
fee_subset = all_router_fees[all_router_fees["node"].isin(active_routers)]

In [None]:
activity_df = pd.DataFrame()
activity_df["traffic"] = fee_subset["node"].value_counts()
activity_df["mean_fee"] = fee_subset.groupby("node")["fee"].mean()

### Router nodes with high traffic do NOT ask higher fees!!!

In [None]:
activity_df.corr(method="spearman")

# 5. Analyze optimal routing fee for nodes

## a.) optimal routing fee for all nodes

In [None]:
opt_fees_df, p_altered = calc_optimal_base_fee(shortest_paths, alternative_paths, all_router_fees)

### Total income revenue

In [None]:
(opt_fees_df["opt_income"] - opt_fees_df["origi_income"]).sum()

### Income gain for nodes with most original income

In [None]:
x = [10,20,50,100,200]
y = []
for k in x:
    y.append(opt_fees_df[opt_fees_df["node"].isin(list(total_income[:k].index))]["income_gain"].mean())

In [None]:
print(list(zip(x,y)))

## b.) Visualization of optimal routing fee calculation

#### top 5 routers

In [None]:
for n in list(p_altered["node"].value_counts()[:5].index):
    _, _, _, _, _ = calculate_max_income(n, p_altered, shortest_paths, all_router_fees, visualize=True)

#### rnd 5 routers

In [None]:
for n in list(p_altered.sample(5)["node"]):
    _, _, _, _, _ = calculate_max_income(n, p_altered, shortest_paths, all_router_fees, visualize=True)