In [None]:
import json
import pandas as pd
import networkx as nx
import seaborn as sns

In [None]:
%matplotlib inline

# LN data

data_file = "../LNdata/0314.json"

In [None]:
node_keys = ["pub_key","last_update"]#,"is_reachable"]

In [None]:
edge_keys = ["node1_pub","node2_pub","last_update","capacity","channel_id"]

In [None]:
data_file = "../LNdata/lncaptures/lngraph/2019/1549032366.json"

In [None]:
with open(data_file) as f:
    ln_json = json.load(f)

In [None]:
ln_json.keys()

In [None]:
ln_json["nodes"][0]

In [None]:
ln_json["edges"][0]

## ???

ln_json["chains"]

ln_json["testnet"]

# LN Network

## nodes

In [None]:
nodes = pd.DataFrame(ln_json["nodes"])[node_keys]
len(nodes)

In [None]:
nodes = nodes[nodes["last_update"] > 0]
len(nodes)

In [None]:
nodes.head()

nodes["is_reachable"].value_counts()

In [None]:
nodes["last_update"].hist(bins=50)

## edges

In [None]:
edges = pd.DataFrame(ln_json["edges"])[edge_keys]
len(edges)

In [None]:
edges.head()

## NOT TOO MANY capacity changes :(

In [None]:
edges.groupby(["node1_pub","node2_pub"])["capacity"].nunique().value_counts()

In [None]:
edges["capacity"].value_counts()[:10]

## There are node pairs that have several channels between them!

In [None]:
edges.groupby(["node1_pub","node2_pub"])["channel_id"].nunique().value_counts()

In [None]:
edges.groupby(["node1_pub","node2_pub","channel_id","capacity"])["last_update"].nunique().value_counts()

## Direction - NO BIDIRECTIONALITY IN THE NETWORK!!!

In [None]:
node_pairs = edges[["node1_pub","node2_pub"]].drop_duplicates()
node_pairs.shape

In [None]:
node_pairs_reverse = node_pairs[["node2_pub","node1_pub"]]
node_pairs_reverse.columns = ["node1_pub","node2_pub"]

In [None]:
node_pairs.merge(node_pairs_reverse, on=["node1_pub","node2_pub"]).shape

## Build graph

In [None]:
G = nx.from_pandas_edgelist(edges, "node1_pub", "node2_pub", ["last_update","capacity"])

In [None]:
print(
"Nodes:", G.number_of_nodes(),
"\nEdges:", G.number_of_edges(),
"\nLoops:", G.number_of_selfloops(),
"\nConnected components:", nx.number_connected_components(G),
)

In [None]:
gcc = max(nx.connected_component_subgraphs(G), key=len)

In [None]:
print(
"Nodes:", gcc.number_of_nodes(),
"\nEdges:", gcc.number_of_edges(),
#"\nDiameter:", nx.diameter(gcc)
)

In [None]:
degrees = dict(nx.degree(G))

In [None]:
nodes["degree"] = nodes["pub_key"].apply(lambda x: degrees.get(x,0))

### Latest nodes

In [None]:
(nodes["last_update"].max() - nodes["last_update"].min()) // 86400

In [None]:
max_time = nodes["last_update"].max()

In [None]:
recent_nodes = nodes[nodes["last_update"] > max_time - 7*86400]

In [None]:
len(recent_nodes)

In [None]:
sns.jointplot(data=recent_nodes, x="last_update", y="degree")