In [2]:
%load_ext autoreload
%load_ext notexbook
%autoreload 2
%load_ext autotime

import matplotlib.pyplot as plt
from matplotlib import cm
import numpy as np
import pandas as pd
import glob
import seaborn as sns
import sys
sys.path.append("..")
from tqdm import tqdm
import networkx as nx

plt.style.use("../config/custom_plt.mplstyle")

from generate_incomplete_graph import *
from compute_precision_vs_missing_links import *

time: 1.3 s (started: 2023-12-15 15:29:08 +01:00)


In [3]:
from sklearn.preprocessing import LabelEncoder

def load_results(inf_coords_path):
    inf_coords = pd.read_csv(inf_coords_path, comment="#", header=None, sep="\s+")
    inf_coords.columns = ['index', 'kappa', 'hyp_rad', 'p1', 'p2', 'p3']
    return inf_coords

time: 162 ms (started: 2023-12-15 15:29:09 +01:00)


In [6]:
from numba import jit


def read_beta_and_mu_and_R(path):
    with open(path, 'r') as f:
        for line in f:
            if '- beta' in line:
                beta = float(line.split()[-1])
            elif '- mu' in line:
                mu = float(line.split()[-1])
            elif '- radius_S' in line:
                R = float(line.split()[-1])
        return beta, mu, R
    

@jit(nopython=True)
def fast_score_matrix(pos, kappas, beta, mu, R):
    score = np.zeros((len(kappas), len(kappas)))
    for i in range(len(pos)):
        for j in range(i):
            p1, p2 = pos[i], pos[j]
            angle = np.arccos(np.dot(p1, p2) / (np.linalg.norm(p2) * np.linalg.norm(p1)))
            if np.isnan(angle):
                angle = 0
            score[i, j] = 1 / (1 + np.power((R * angle) / np.sqrt(mu * kappas[i] * kappas[j]), beta))
            score[j, i] = score[i, j]
    return score


def compute_score_matrix(g, df, beta, mu, R):
    nodes_order = list(g.nodes())
    df['index'] = df['index'].astype(str)
    df = df.set_index('index')
    df = df.loc[nodes_order]
    
    pos = df[['p1', 'p2', 'p3']].values
    kappas = df['kappa'].values
    return fast_score_matrix(pos, kappas, beta, mu, R)


# INFO: Original implementation
# def compute_precision_curve_incomplete_graph(qs, df_incomplete, complete_graph, incomplete_graph, beta, mu, R, ntimes=5):
#     score = compute_score_matrix(incomplete_graph, df_incomplete, beta, mu, R)

#     precisions = []
#     for q in tqdm(qs):
#         p_mean = []
#         for _ in range(ntimes):
#             train_graph = generate_incomplete_network(incomplete_graph, q0=q, check_gcc=False)
#             ET = nx.to_numpy_matrix(train_graph)
#             EP = nx.to_numpy_matrix(complete_graph) - ET

#             q0 = 0.1 # fraction of links removed from the original graph
#             L = int(q * nx.number_of_edges(incomplete_graph) / (1 - q0))
#             p = compute_precision(ET, EP, L, score)
#             p_mean.append(p)
#         precisions.append(np.mean(p_mean))
#     return precisions


def compute_precision_curve_incomplete_graph(qs, df_incomplete, complete_graph, incomplete_graph, beta, mu, R, ntimes=5):
    score = compute_score_matrix(incomplete_graph, df_incomplete, beta, mu, R)

    precisions = []
    for q in tqdm(qs):
        p_mean = []
        for _ in range(ntimes):
            ET = nx.to_numpy_array(incomplete_graph)
            EP = nx.to_numpy_array(complete_graph) - ET

            q0 = 0.1 # fraction of links removed from the original graph
            L = int(q * nx.number_of_edges(incomplete_graph) / (1 - q0))
            p = compute_precision(ET, EP, L, score)
            p_mean.append(p)
        precisions.append(np.mean(p_mean))
    return precisions


def get_precision_curve_all(qs, path, complete_graph, ntimes=5):
    all_pr_le_ml, all_pr_umap_ml, all_pr_only_umap = [], [], []
    for p in glob.glob(f"{path}/umap*/"):
        incomplete_graph = nx.read_edgelist(glob.glob(f"{p}/le_ml/*.edge")[0])
        path_le_ml = glob.glob(f"{p}/le_ml/*.inf_coord")[0]
        beta_le_ml, mu_le_ml, R_le_ml = read_beta_and_mu_and_R(path_le_ml)
        df_le_ml_incomplete = load_results(path_le_ml)
        
        path_umap_ml = glob.glob(f"{p}/umap_ml/*.inf_coord")[0]
        beta_umap_ml, mu_umap_ml, R_umap_ml = read_beta_and_mu_and_R(path_umap_ml)
        df_umap_ml_incomplete = load_results(path_umap_ml)
        
        path_only_umap = glob.glob(f"{p}/only_umap/*.inf_coord")[0]
        beta_only_umap, mu_only_umap, R_only_umap = read_beta_and_mu_and_R(path_only_umap)
        df_only_umap_incomplete = load_results(path_only_umap)
        
        pr_le_ml  = compute_precision_curve_incomplete_graph(
            qs, df_le_ml_incomplete, complete_graph, incomplete_graph, beta_le_ml, mu_le_ml, R_le_ml, ntimes)
        pr_umap_ml  = compute_precision_curve_incomplete_graph(
            qs, df_umap_ml_incomplete, complete_graph, incomplete_graph, beta_umap_ml, mu_umap_ml, R_umap_ml, ntimes)
        pr_only_umap  = compute_precision_curve_incomplete_graph(
            qs, df_only_umap_incomplete, complete_graph, incomplete_graph, beta_only_umap, mu_only_umap, R_only_umap, ntimes)
        
        all_pr_le_ml.append(pr_le_ml)
        all_pr_umap_ml.append(pr_umap_ml)
        all_pr_only_umap.append(pr_only_umap)
    return np.array(all_pr_le_ml), np.array(all_pr_umap_ml), np.array(all_pr_only_umap)    

qs = np.linspace(0.01, 0.8, num=20)    

time: 3.15 ms (started: 2023-12-15 15:36:41 +01:00)


In [None]:
amazon_photo_complete_graph = nx.read_edgelist("/home/rob/MEGAsync/datasets/networks/machine_learning_datasets/f_mercator/amazon_photo/umap/le_ml/amazon_photo_GC.edge")
path = "/home/rob/MEGAsync/datasets/networks/machine_learning_datasets/f_mercator/amazon_photo/incomplete/"
amazon_photo_le_ml_pr, amazon_photo_umap_ml_pr, amazon_photo_only_umap_pr = get_precision_curve_all(qs, path, amazon_photo_complete_graph, ntimes=1)


100%|██████████| 20/20 [41:38:03<00:00, 7494.16s/it]    
100%|██████████| 20/20 [3:45:53<00:00, 677.66s/it]  
100%|██████████| 20/20 [3:43:41<00:00, 671.08s/it]  
100%|██████████| 20/20 [3:39:18<00:00, 657.93s/it]  
100%|██████████| 20/20 [15:29:57<00:00, 2789.89s/it]    
100%|██████████| 20/20 [5:02:45<00:00, 908.28s/it]   
 85%|████████▌ | 17/20 [18:16:46<1:03:16, 1265.38s/it] 

In [None]:
def plot_precision_curve_with_error(qs, df_le_ml, df_umap_ml, df_only_umap, title):
    plt.plot(qs, np.mean(df_le_ml, axis=0), 'o--', label='LE+ML', color='xkcd:red')
    plt.fill_between(qs, np.mean(df_le_ml, axis=0) - np.std(df_le_ml, axis=0), 
                         np.mean(df_le_ml, axis=0) + np.std(df_le_ml, axis=0), color='xkcd:red', alpha=0.1)
    
    plt.plot(qs, np.mean(df_umap_ml, axis=0), 'v--', label='UMAP+ML', color='xkcd:blue')
    plt.fill_between(qs, np.mean(df_umap_ml, axis=0) - np.std(df_umap_ml, axis=0), 
                         np.mean(df_umap_ml, axis=0) + np.std(df_umap_ml, axis=0), color='xkcd:blue', alpha=0.1)
    
    plt.plot(qs, np.mean(df_only_umap, axis=0), 'v--', label='UMAP', color='xkcd:green')
    plt.fill_between(qs, np.mean(df_only_umap, axis=0) - np.std(df_only_umap, axis=0), 
                         np.mean(df_only_umap, axis=0) + np.std(df_only_umap, axis=0), color='xkcd:green', alpha=0.1)
    plt.title(title)
    

    
plot_precision_curve_with_error(qs, amazon_photo_le_ml_pr, amazon_photo_umap_ml_pr, amazon_photo_only_umap_pr, 'amazon_photo')

plt.xlabel('Fraction of missing links')
plt.ylabel('Precision')
plt.tight_layout()
