In [None]:
import datetime
from pathlib import Path

import networkx as nx
import network_diffusion as nd
import pandas as pd

from misc import net_loader

In [None]:
def filter_df(df):
    lower_b = df["date"] > datetime.datetime(2009, 1, 1)
    upper_b = df["date"] < datetime.datetime(2009, 3, 31)
    return df[lower_b & upper_b]

## Load nets

In [None]:
base_path = Path("/workspace/shared/ns/timik_dataset")

campaigns_path = base_path / "campaigns.csv"  # this layer is skipped
friends_path = base_path / "friends.csv"  # this layer as well
messages_path = base_path / "messages.csv"
transactions_path = base_path / "transactions.csv"
visits_path = base_path / "visits.csv"
# skip logins.csv - no edges there!

In [None]:
campaigns_dtypes = {0: "str", 1: "str", 2: "str", 3: "str"}
campaigns_df = pd.read_csv(campaigns_path, sep=';', header=None, dtype=campaigns_dtypes, parse_dates=[1]).rename(
    columns={0: "capmaign_id", 1: "date", 2: "source", 3: "target"}
)
campaigns_df["layer"] = "campaign"
campaigns_dff = filter_df(campaigns_df)

print(len(campaigns_df), len(campaigns_dff))
campaigns_dff.head()

In [None]:
c = nx.from_pandas_edgelist(campaigns_dff)
len(c), nx.number_connected_components(c)

In [None]:
friends_dtypes = {0: "str", 1: "str", 2: "str"}
friends_df = pd.read_csv(friends_path, sep=';', header=None, dtype=friends_dtypes, parse_dates=[0]).rename(
    columns={0: "date", 1: "source", 2: "target"}
)
friends_df["layer"] = "friends"
friends_dff = filter_df(friends_df)

print(len(friends_df), len(friends_dff))
friends_dff.head()

In [None]:
f = nx.from_pandas_edgelist(friends_dff)
len(f), nx.number_connected_components(f)

In [None]:
messages_dtypes = {0: "str", 1: "str", 2: "str"}
messages_df = pd.read_csv(messages_path, sep=';', header=None, dtype=messages_dtypes, parse_dates=[0]).rename(
    columns={0: "date", 1: "source", 2: "target"}
)
messages_df["layer"] = "messages"
messages_dff = filter_df(messages_df)

print(len(messages_df), len(messages_dff))
messages_dff.head()

In [None]:
m = nx.from_pandas_edgelist(messages_dff)
len(m), nx.number_connected_components(m)

In [None]:
transactions_dtypes = {0: "str", 1: "str", 2: "str", 3: "str"}
transactions_df = pd.read_csv(transactions_path, sep=';', header=None, dtype=transactions_dtypes, parse_dates=[0]).rename(
    columns={0: "date", 1: "source", 2: "target", 3: "amount"}
)
transactions_df["layer"] = "transactions"
transactions_dff = filter_df(transactions_df)

print(len(transactions_df), len(transactions_dff))
transactions_dff.head()

In [None]:
t = nx.from_pandas_edgelist(transactions_dff)
len(t), nx.number_connected_components(t)

In [None]:
visits_dtypes = {0: "str", 1: "str", 2: "str"}
visits_df = pd.read_csv(visits_path, sep=';', header=None, dtype=visits_dtypes, parse_dates=[0]).rename(
    columns={0: "date", 1: "source", 2: "target"}
)
visits_df["layer"] = "visits"
visits_dff = filter_df(visits_df)

print(len(visits_df), len(visits_dff))
visits_dff.head()

In [None]:
v = nx.from_pandas_edgelist(visits_dff)
len(v), nx.number_connected_components(v)

## Summary of layers

In [None]:
nets = {"messages": [m, messages_dff], "transactions": [t, transactions_dff], "visits": [v, visits_dff]}

In [None]:
def print_net(net): print(f"num nodes, {len(net.nodes())}, num edges: {len(net.edges)}, density: {nx.density(net)}")

actors = []
for name, (net, _) in nets.items():
    actors.extend([*net.nodes()])
    print(name), print_net(net)

In [None]:
print("sum nodes: ", sum([len(n.nodes()) for n, _ in nets.values()]))
print("sum edges: ", sum([len(n.edges()) for n, _ in nets.values()]))
print("sum actors: ", len(set(actors)))

## Save layers

In [None]:
out_path = Path("data/networks/timik1q2009")
out_path.mkdir(exist_ok=True, parents=True)

def save_df(df, name):
    df.reset_index().drop("index", axis=1).to_csv(out_path / f"{name}.csv")

def save_graph(net, name):
    edge_list = list(net.edges(data=False))
    df = pd.DataFrame(edge_list, columns=['source', 'target'])
    df.to_csv(out_path / f"{name}.csv", index=False)

In [None]:
for name, (net, df) in nets.items():
    save_graph(net, name)

## Sanity check

In [None]:
layer_graphs = []
layer_names = []

for i in out_path.glob("*.csv"):
    layer_names.append(i.stem)
    layer_graphs.append(nx.from_pandas_edgelist(pd.read_csv(i)))

In [None]:
timik1q2009 = nd.MultilayerNetwork.from_nx_layers(network_list=layer_graphs, layer_names=layer_names)

In [None]:
print(timik1q2009)

## Reference networks

In [None]:
cannes = net_loader.get_cannes_network()
print(cannes)

In [None]:
arxiv = net_loader.get_arxiv_network()
print(arxiv)