In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import numpy as np
import os, sys
sys.path.insert(0,'./python/')
from analysis_utils import *

In [None]:
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
sns.set(font_scale = 2)
sns.set_style("whitegrid")

In [None]:
import sys
from datawand.parametrization import ParamHelper
ph = ParamHelper("../","LNGraph",sys.argv)

experiment_id = ph.get("sim_res_dir")
snapshots = ph.get("snapshots")
simulation_dir = ph.get("sim_root_dir")

# 1. Load data

In [None]:
node_names = pd.read_csv("/mnt/idms/fberes/data/bitcoin_ln_research/node_names.csv")

In [None]:
node_names.head()

In [None]:
LNBIG_nodes = list(node_names[node_names["is_lnbig"]]["pub_key"])
len(LNBIG_nodes)

In [None]:
node_names = node_names[["name","pub_key"]]

#experiment_id = ph.get("sim_dir")
experiment_id = "2019-09-06_22:03:19_50000sat_k6000"
snapshots = range(40)#range(54)
simulation_dir = "/mnt/idms/fberes/data/bitcoin_ln_research/simulations_1days/"

experiment_id = "60000sat_k7000_eps0.80_wdepFalse"
snapshots = range(7)
simulation_dir = "/mnt/idms/fberes/data/bitcoin_ln_research/simulations_fee_opt_7days//"

experiment_id = "60000sat_k7000_eps0.80_wdepTrue"
snapshots = range(7)
simulation_dir = "/mnt/idms/fberes/data/bitcoin_ln_research/simulations_fee_opt_7days//"

experiment_id = "60000sat_k7000_eps0.80"
snapshots = range(40)
simulation_dir = "/mnt/idms/fberes/data/bitcoin_ln_research/simulations_fee_opt_again_1days/"

experiment_id = "60000sat_k7000_eps0.80"
snapshots = range(40)
simulation_dir = "/mnt/idms/fberes/data/bitcoin_ln_research/simulations_exclusion_1days//"

In [None]:
experiment_id = "60000sat_k7000_eps0.80_wdepTrue_corrected"
snapshots = range(40)
simulation_dir = "/mnt/idms/fberes/data/bitcoin_ln_research/simulations_depletions_1days/"

In [None]:
experiment_folders = get_experiment_files(experiment_id, snapshots, simulation_dir)

In [None]:
pricing_pol = load_data(experiment_folders, snapshots, "opt_fees")

In [None]:
source_fee = load_data(experiment_folders, snapshots, "source_fees")

In [None]:
router_income = load_data(experiment_folders, snapshots, "router_incomes")

In [None]:
node_depletions = load_data(experiment_folders, snapshots, "node_depletions")

In [None]:
depletions_df = pd.concat(node_depletions)

In [None]:
depletions_df.drop(["inbound_deps","outbound_deps"], axis=1, inplace=True)

In [None]:
depletions_df.head()

In [None]:
mean_depletions = depletions_df.groupby("node")[["num_inbound","num_outbound"]].mean().reset_index()

In [None]:
mean_depletions.head()

In [None]:
mean_depletions = mean_depletions.merge(node_names, left_on="node", right_on="pub_key").drop("pub_key", axis=1)

In [None]:
mean_depletions = mean_depletions.sort_values("num_outbound",ascending=False)

In [None]:
mean_depletions.to_csv("/mnt/idms/fberes/data/bitcoin_ln_research/results/both_node_depletions.csv", index=False)

In [None]:
mean_depletions[["name","num_outbound"]].head(20)

In [None]:
mean_depletions.sort_values("num_inbound",ascending=False)[["name","num_inbound"]].head(20)

global_failure_ratio = load_data(experiment_folders, snapshots, "global_failure_ratios")

global_failure_ratios = pd.concat(global_failure_ratio)

mean_failure_ratio = global_failure_ratios.groupby("entity")["failure_ratio"].mean().sort_values(ascending=False).reset_index()

mean_failure_ratio.to_csv("/mnt/idms/fberes/data/bitcoin_ln_research/results/entity_global_failure_ratios.csv", index=False)

## Router incomes

In [None]:
router_income_col = "fee"#"income"

In [None]:
x = snapshots
y = [router_income[i][router_income_col].mean() for i in snapshots]
plt.plot(x,y,"bo")

In [None]:
sns.jointplot(data=pd.concat(router_income), x="snapshot_id", y=router_income_col)

In [None]:
all_router_incomes = pd.concat(router_income)

## Source fees

In [None]:
x = snapshots
y = [source_fee[i]["mean_fee"].mean() for i in snapshots]
plt.plot(x,y, "bo")

In [None]:
sns.jointplot(data=pd.concat(source_fee), x="snapshot_id", y="mean_fee")

## Optimal pricing

x = snapshots
y = [pricing_pol[i]["opt_delta"].mean() for i in snapshots]
plt.plot(x,y, "bo")

x = snapshots
y = [pricing_pol[i]["opt_traffic"].mean() for i in snapshots]
plt.plot(x,y, "bo")

x = snapshots
y = [pricing_pol[i]["origi_income"].mean() for i in snapshots]
plt.plot(x,y, "bo")

x = snapshots
y = [pricing_pol[i]["opt_income"].mean() for i in snapshots]
plt.plot(x,y, "bo")

### Calculate income revenue

for df in pricing_pol:
    df["income_diff"] = df["opt_income"] - df["origi_income"]

# 2. Simulation stability

corrs = ["pearson","spearman","kendall","wkendall"]

## a.) Stability of basing node statistics (moved to script)

- stability could be improved by running more independent experiments
- this time (using channel depletions) the experiment is less stable?

## b.) Stability of optimal pricing results

origi_inc = pd.DataFrame([avg_cross_corr(pricing_pol, snap_id, "origi_income") for snap_id in snapshots])
opt_inc = pd.DataFrame([avg_cross_corr(pricing_pol, snap_id, "opt_income") for snap_id in snapshots])
income_diff = pd.DataFrame([avg_cross_corr(pricing_pol, snap_id, "income_diff") for snap_id in snapshots])
opt_delta = pd.DataFrame([avg_cross_corr(pricing_pol, snap_id, "opt_delta") for snap_id in snapshots])

colors = ['b','g','r','m']
fig, axis = plt.subplots(1,3,figsize=(15,4))
x = snapshots
#incomes
axis[0].set_title("INCOME mean cross correlations")
for i, c in enumerate(corrs):
    axis[0].plot(x,origi_inc[c], colors[i]+'-', label=c+"_orig")
    axis[0].plot(x,opt_inc[c], colors[i]+'--', label=c+"_opt")
axis[0].set_xlabel("snapshot")
axis[0].legend()
#income_diff
axis[1].set_title("INCOME_DIFF mean cross correlations")
for i, c in enumerate(corrs):
    axis[1].plot(x,income_diff[c], colors[i]+'-', label=c)
axis[1].set_xlabel("snapshot")
axis[1].legend()
#opt_delta
axis[2].set_title("OPT_DELTA mean cross correlations")
for i, c in enumerate(corrs):
    axis[2].plot(x,opt_delta[c], colors[i]+'-', label=c)
axis[2].set_xlabel("snapshot")
axis[2].legend()
plt.show()

#### Incomes observations

- taking mean cross correlation of user incomes (original and optimal)
- weighted kendall-tau gives decreased to 0.90 from 0.95 (after including capacity maintenance)
- in case of unweighted spearman and kendall the optimal income correlates better across samples 

#### income_diff observations¶

- taking mean cross correlation of income difference (optimal income - original income)
- weighted kendall-tau decreased to 0.80 from 0.93 (after including capacity maintenance)

#### opt_delta observations¶

- taking mean cross correlation of optimal base fee change
- weighted kendall-tau decreased to 0.70 from 0.90 (after including capacity maintenance)

**CONCLUSION: our base_fee optimization procedure is less efficient (meaningful) after the implementation of capacity maintenance!!!**

# 3. Income revenue analysis

In [None]:
node_names = pd.read_csv("/mnt/idms/fberes/data/bitcoin_ln_research/node_names.csv")
print(node_names.head())

### Average stats (over all snapshots and samples)

In [None]:
all_records = pd.concat([pricing_pol[i] for i in snapshots])

In [None]:
average_stats_all = all_records.groupby("node").mean().drop(["sample","snapshot_id"], axis=1).reset_index()

In [None]:
average_stats_all = average_stats_all.merge(node_names, left_on="node", right_on="pub_key", how="left").drop("pub_key", axis=1).set_index("node")

In [None]:
average_stats_all = average_stats_all.rename({"opt_traffic":"opt_traffic_ratio"}, axis=1)

In [None]:
from ln_utils import corr_mx

results_dir = "/mnt/idms/fberes/data/bitcoin_ln_research/results/opt_base_fee/"
cmap = "RdBu_r"
fig_s=(10,10)

def plot_pricing_policy_corrs(corr_type):
    corrs = corr_mx(average_stats_all[["total_income","alt_income","alt_traffic","total_traffic","opt_income","opt_delta","opt_traffic_ratio","failed_traffic_ratio"]], method=corr_type)
    fig, ax = plt.subplots(figsize=fig_s)
    g = sns.heatmap(corrs, ax=ax, cmap=cmap, square=True, annot=True, fmt='.1f')
    ax.set_ylim(8.0, 0)
    #plt.savefig("%s/pricing_policy_%s.pdf" % (results_dir, corr_type), format="pdf", bbox_inches='tight')

In [None]:
for corr_type in ["spearman","kendall","wkendall"]:
    plot_pricing_policy_corrs(corr_type)

# Categorization

In [None]:
x_label = "daily routing income groups"

In [None]:
def cut_into_categories(df):
    categories = [df[:10], df[10:20], df[20:50], df[50:100], df[100:]]
    for idx, cat in enumerate(categories):
        cat[x_label] = idx+1
    return categories

In [None]:
x = range(5)

## sort by total routing income (for binarization)

In [None]:
average_stats_all = average_stats_all.sort_values("total_income", ascending=False)

average_stats_all = average_stats_all.sort_values("total_traffic", ascending=False)

In [None]:
reame_dict = {
    "failed_traffic_ratio":"failed traffic ratio",
    "income_diff":"daily income gain (satoshi)",
    "opt_delta":r"$\beta^*$ base fee increment (satoshi)"
}
average_stats_all = average_stats_all.rename(reame_dict, axis=1)

In [None]:
len(average_stats_all)

In [None]:
average_stats_all.reset_index(drop=True)[["name",reame_dict["income_diff"],"alt_traffic",reame_dict["failed_traffic_ratio"]]].head(20)

In [None]:
average_stats_all_cats = cut_into_categories(average_stats_all)
average_stats_all_with_groups = pd.concat(average_stats_all_cats)

In [None]:
def bar_plot_for_groups(col, is_log_yscale=True, ci=95, ylim=False):
    sns.catplot(data=average_stats_all_with_groups, x=x_label, y=reame_dict[col], kind="bar", ci=ci, height=8)
    if is_log_yscale:
        plt.yscale("log")
    if ylim:
        plt.ylim(0, 10**4)
    plt.savefig("%s/groups_%s.pdf" % (results_dir, col), format="pdf", bbox_inches='tight')

In [None]:
bar_plot_for_groups("failed_traffic_ratio", is_log_yscale=False)

In [None]:
bar_plot_for_groups("income_diff", ci=None)

In [None]:
bar_plot_for_groups("opt_delta", ci=None, ylim=True)

#### Mean original income for the selected categories

In [None]:
[float(cat[["total_income"]].mean()) for cat in average_stats_all_cats]