In [None]:
import json
import pandas as pd
import networkx as nx
import seaborn as sns

In [None]:
%matplotlib inline

# LN data

data_file = "../LNdata/0314.json"

In [None]:
node_keys = ["pub_key","last_update"]#,"is_reachable"]

In [None]:
edge_keys = ["node1_pub","node2_pub","last_update","capacity","channel_id"]

In [None]:
data_file = "../LNdata/lncaptures/lngraph/2019/1549032366.json"

In [None]:
with open(data_file) as f:
    ln_json = json.load(f)

In [None]:
ln_json.keys()

In [None]:
ln_json["nodes"][0]

In [None]:
ln_json["edges"][0]

## ???

ln_json["chains"]

ln_json["testnet"]

# LN Network

## nodes

In [None]:
nodes = pd.DataFrame(ln_json["nodes"])[node_keys]
len(nodes)

In [None]:
nodes = nodes[nodes["last_update"] > 0]
len(nodes)

In [None]:
nodes.head()

nodes["is_reachable"].value_counts()

In [None]:
nodes["last_update"].hist(bins=50)

## edges

In [None]:
edges = pd.DataFrame(ln_json["edges"])[edge_keys]
len(edges)

In [None]:
edges.head()

## NOT TOO MANY capacity changes :(

In [None]:
edges.groupby(["node1_pub","node2_pub"])["capacity"].nunique().value_counts()

In [None]:
edges["capacity"].value_counts()[:10]

## There are node pairs that have several channels between them!

In [None]:
edges.groupby(["node1_pub","node2_pub"])["channel_id"].nunique().value_counts()

In [None]:
edges.groupby(["node1_pub","node2_pub","channel_id","capacity"])["last_update"].nunique().value_counts()

## Direction - NO BIDIRECTIONALITY IN THE NETWORK!!!

In [None]:
node_pairs = edges[["node1_pub","node2_pub"]].drop_duplicates()
node_pairs.shape

In [None]:
node_pairs_reverse = node_pairs[["node2_pub","node1_pub"]]
node_pairs_reverse.columns = ["node1_pub","node2_pub"]

In [None]:
node_pairs.merge(node_pairs_reverse, on=["node1_pub","node2_pub"]).shape

## Build graph

In [None]:
G = nx.from_pandas_edgelist(edges, "node1_pub", "node2_pub", ["last_update","capacity"])

In [None]:
print(
"Nodes:", G.number_of_nodes(),
"\nEdges:", G.number_of_edges(),
"\nLoops:", G.number_of_selfloops(),
"\nConnected components:", nx.number_connected_components(G),
)

In [None]:
gcc = max(nx.connected_component_subgraphs(G), key=len)

In [None]:
print(
"Nodes:", gcc.number_of_nodes(),
"\nEdges:", gcc.number_of_edges(),
#"\nDiameter:", nx.diameter(gcc)
)

In [None]:
degrees = dict(nx.degree(G))

In [None]:
nodes["degree"] = nodes["pub_key"].apply(lambda x: degrees.get(x,0))

### Latest nodes

In [None]:
(nodes["last_update"].max() - nodes["last_update"].min()) // 86400

In [None]:
max_time = nodes["last_update"].max()

In [None]:
recent_nodes = nodes[nodes["last_update"] > max_time - 7*86400]

In [None]:
len(recent_nodes)

In [None]:
sns.jointplot(data=recent_nodes, x="last_update", y="degree")

# Edge deletions

In [None]:
import os
from ln_utils import *

In [None]:
f1 = "../LNdata/lncaptures/lngraph/2019/1549032366.json"
f2 = "../LNdata/lncaptures/lngraph/2019/1549497602.json"

In [None]:
(1549497602 - 1549032366) // 86400

In [None]:
EDGE_KEYS = ["node1_pub","node2_pub","last_update","capacity","channel_id",'node1_policy','node2_policy']
nodes1, edges1 = load_temp_data([f1], edge_keys=EDGE_KEYS)
nodes2, edges2 = load_temp_data([f2], edge_keys=EDGE_KEYS)

# This is how we can observe edge deletions!!!

   * only the existence in the daily snapshots matter!!!

In [None]:
n1 = set(nodes1["pub_key"])
n2 = set(nodes2["pub_key"])

In [None]:
len(n1.difference(n2)), len(n2.difference(n1))

In [None]:
e1 = set(edges1["channel_id"])
e2 = set(edges2["channel_id"])

In [None]:
len(e1.difference(e2)), len(e2.difference(e1))

## Why there are multiple payment channels between nodes!!! - are there different policies on these edges?

In [None]:
chan_cnt = edges1.groupby(["node1_pub","node2_pub"])["channel_id"].nunique()#.value_counts()

In [None]:
pub1, pub2 = chan_cnt[chan_cnt==3].index[0]

In [None]:
df = edges1[(edges1["node1_pub"]==pub1) & (edges1["node2_pub"]==pub2)]
df

In [None]:
df["node1_policy"].loc[2835]

In [None]:
df["node1_policy"].loc[6758]

In [None]:
df["node1_policy"].loc[19252]

In [None]:
1548971388 #January 31, 2019 9:49:48 PM # high cap
1548989388 #February 1, 2019 2:49:48 AM # low cap
1548989388 #February 1, 2019 2:49:48 AM # low cap

In [None]:
#print(chan_cnt[chan_cnt==3].reset_index().loc[0]["node_1_pub"])
#print(chan_cnt[chan_cnt==3].reset_index().loc[0]["node_2_pub"])