In [1]:
import pandas as pd
import networkx as nx
import network_distance as nd
import matplotlib.pyplot as plt

import warnings
warnings.simplefilter(action = "ignore", category = FutureWarning)

In [2]:
vector_df = pd.read_csv("../data/obama_nodevectors.csv") # Load the data. Comma-separated. 3 columns: node id, then two values per node.

vector_df["X"] -= vector_df["X"].min()                   # We make sure the vectors' minimum is zero, to compare results with the ones from previous exercise
vector_df["Y"] -= vector_df["Y"].min()

v1_dict = vector_df.set_index("node").to_dict()["X"]     # Convert each variable to a "node -> value" dict
v2_dict = vector_df.set_index("node").to_dict()["Y"]

# Read comma-separated edge list with one edge per line into a networkx undirected graph object
G = nx.read_edgelist("../data/obama_edgelist.csv", delimiter = ",", nodetype = int)

In [3]:
print(f"""
Single linkage: {nd.spl(v1_dict, v2_dict, G, linkage = "single")};
Average linkage: {nd.spl(v1_dict, v2_dict, G, linkage = "avg")};
Complete linkage: {nd.spl(v1_dict, v2_dict, G, linkage = "complete")}.
""")


Single linkage: 0.19261940695605487;
Average linkage: 2.492817629483678;
Complete linkage: 3.344816651914214.



In [4]:
print(f"""
EMD: {nd.emd(v1_dict, v2_dict, G)}.
""")


EMD: 0.17431584907100547.



In [5]:
print(f"""
GFT Euclidean: {nd.gft(v1_dict, v2_dict, G, linkage = "euclidean")};
GFT Cosine: {nd.gft(v1_dict, v2_dict, G, linkage = "cosine")};
GFT Pearson: {nd.gft(v1_dict, v2_dict, G, linkage = "pearson")}.
""")


GFT Euclidean: 0.34111852283972527;
GFT Cosine: 0.05347390684871789;
GFT Pearson: 0.05347390684871789.

