In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt

if os.getcwd()[-3:] != "TGB":
    os.chdir('..')
print(os.getcwd())

from tgb.linkproppred.dataset import LinkPropPredDataset

def load_data(name):
    dataset = LinkPropPredDataset(name=name, root="datasets", preprocess=True)
    data = dataset.full_data
    return data

def show_data(data, data_name="", ign_keys=[], save_path=None):
    # Data shape
    
    print("=============== Data shapes ================")
    print("Length of data: ", len(data))
    print("Number of unique nodes: ", len(np.unique(np.hstack((data['sources'],data['destinations'])))))
    print("Number of edges: ", len(data['sources']))
    print("Number of unique timestamps: ", len(np.unique(data['timestamps'])))
    print()
    for key in data.keys():
        print(key, data[key].shape)


    print("============== Preview of attributes =================")

    for key in data.keys():
        if key in ['edge_feat']:
            print(key, data[key][0][:6])
            print("\feats:", data[key].shape[1]) #edge_feat
        else:
            print(key, data[key][:10])
    
    # Plot data
    fig, axs = plt.subplots(len(data.keys()), 2, figsize=(20, 25))
    plot_row = -1
    for i, key in enumerate(data.keys()):
        plot_data = data[key]
        if key in ['edge_feat']:
            plot_data = data[key].flatten()
        plot_row += 1
        # set main title
        axs[i][0].set_title("histogram of " + key)
        axs[i][0].hist(plot_data, bins=100)

        axs[i][1].set_title("plot of " + key)
        axs[i][1].plot(plot_data)
    plt.show()

    # save plots
    if save_path is not None:
        fig.savefig(save_path)

def calculate_density(data):
    # Extract sources and destinations
    sources = data['sources']
    destinations = data['destinations']

    # Determine unique nodes
    nodes = set(sources).union(set(destinations))
    N = len(nodes)

    # Total number of edges
    E = len(sources)

    # Calculate average density
    average_density = E / (N * (N - 1)) * 100

    print(f"Average Density of the Network: {average_density:.3f}%")

In [None]:
def density(N,E):
    return E / (N * (N - 1)) * 100

In [None]:
calculate_density(data_wiki)

In [None]:
density(18143,67169570)

# Wiki

In [None]:
data_wiki = load_data("tgbl-wiki")

In [None]:
show_data(data_wiki)

In [None]:
data_wiki.keys()

In [None]:
calculate_density(data_wiki)

# Review

In [None]:
data_review = load_data("tgbl-review")

In [None]:
show_data(data_review, ign_keys=['edge_feat'])

In [None]:
calculate_density(data_review)

# Flight

In [None]:
data_flight = load_data("tgbl-flight")

In [None]:
show_data(data_flight, ign_keys=['edge_feat'])

In [None]:
calculate_density(data_flight)

# Coin

In [None]:
data_coin = load_data("tgbl-coin")

In [None]:
show_data(data_coin, ign_keys=['edge_feat'])

In [None]:
calculate_density(data_coin)

# Comment

In [None]:
data_comment = load_data("tgbl-comment")

In [None]:
show_data(data_comment, ign_keys=['edge_feat'])

In [None]:
calculate_density(data_comment)