In [8]:
import pandas as pd
import numpy as np
import networkx as nx
import scipy as sp

from deeplp.models.data_prep import create_weighted_graph, load_data
from deeplp.models.data_prep import prepare_data, random_unlabel, calc_masks
from deeplp.models.utils import accuracy, indices_to_vec
from deeplp.models.data_prep import select_features

from datasets import utils
from datasets.utils import listify, lisify_links, rbf, one_hot, get_u_to_b_indices, get_d_to_b_indices, get_r_to_b_indices
from datasets.utils import node_feature_similarities, node_feature_reduction, node_centralities, node_partitions, random_walk_features, node_others, edge_centralities, link_predictions, community_features

In [9]:
data = 'flickr'
true_labels, features, edge_features, node_features, graph = load_data(data,'flip',directed=1)
U,D,B,R,node_features = utils.load_data(data,'flip','datasets/')
edges = np.array(B.edges())
sources,sinks = edges[:,0],edges[:,1]


-----------flickr-----------
Loading labels...
Loading features...
Loading edge features...
Loading graph...
Done!


In [10]:
labeled_indices, unlabeled_indices = \
    random_unlabel(true_labels,0.99,features,
                   seed=0)

num_nodes, num_classes = true_labels.shape

labels, is_labeled = calc_masks(true_labels, labeled_indices, unlabeled_indices, logistic=0)


In [11]:
labels.shape,graph.shape

((7971, 7), (7971, 7971))

In [112]:
seed_to_node_lengths = []
for i in labeled_indices:
    shortest_paths_seed = nx.shortest_path_length(B,source=int(i))
    path_lengths = [i[1] for i in sorted(shortest_paths_seed.items())]
    seed_to_node_lengths.append(path_lengths)
seed_to_node_lengths = np.array(seed_to_node_lengths)
labels_for_seeds = np.argmax(true_labels[labeled_indices],axis=1)
labels_for_seeds_dict = {}
i=0
for i,label in enumerate(labels_for_seeds):
    if label in labels_for_seeds_dict:
        labels_for_seeds_dict[label].append(i)
    else:
        labels_for_seeds_dict[label] = [i]

seed_features = []
for label in labels_for_seeds_dict:
    indices = labels_for_seeds_dict[label]
    label_seed_to_node_lengths = seed_to_node_lengths[indices]
    label_min_len_to_seed = np.min(label_seed_to_node_lengths,axis=0)[sources]
    label_mean_len_to_seed = np.mean(label_seed_to_node_lengths,axis=0)[sources]
    seed_features.append(label_min_len_to_seed)
    seed_features.append(label_mean_len_to_seed)

min_len_to_seed = np.min(seed_to_node_lengths,axis=0)[sources]
mean_len_to_seed = np.mean(seed_to_node_lengths,axis=0)[sources]
seed_features.append(min_len_to_seed)
seed_features.append(mean_len_to_seed)
seed_features = np.array(seed_features).T
seed_features = utils.pos_normalize(seed_features)
edge_fetures = np.hstack([edge_features,seed_features])

In [116]:
edge_fetures = np.hstack([edge_features,seed_features])

In [117]:
edge_fetures.shape,seed_features.shape

((88648, 55), (88648, 8))

In [8]:
shortest_paths = np.ones(num_nodes) * np.inf
num_shortest_paths = np.ones(num_nodes) * np.inf
shortest_paths_seed = nx.shortest_path_length(B,source=labeled_indices)
# for target in range(num_nodes):
#     shortest_path = shortest_paths_seed[target]
#     shortest_paths[target] = shortest_path
#     num_shortest_path = len(list(nx.all_shortest_paths(B, labeled_indices, target, weight=None)))
#     num_shortest_paths[target] = num_shortest_path

NameError: name 'source' is not defined

In [11]:
for edge in B.edges:
    B.edges[edge[0],edge[1]]['capacity'] = 1
maximum_flow = np.ones(num_nodes) * np.inf
local_edge_connectivity = np.ones(num_nodes) * np.inf
for target in range(num_nodes):
    if target % 100 == 0:
        print(target)
    if target == source:
        maximum_flow[target] = 0
        local_edge_connectivity[target] = 0   
    else:
        maximum_flow[target] = nx.maximum_flow(B, source, target)[0]
        local_edge_connectivity[target] = nx.algorithms.connectivity.local_edge_connectivity(B, source, target)

0
100
200
300


KeyboardInterrupt: 

In [16]:
conductance = np.ones(num_nodes) * np.inf
cut_size = np.ones(num_nodes) * np.inf
normalized_cut_size = np.ones(num_nodes) * np.inf
for node in range(num_nodes):
    if node % 100 == 0:
        print(node)
    conductance[node] = nx.conductance(B, labeled_indices, [node])
    cut_size[node] = nx.cut_size(B, labeled_indices, [node])
    normalized_cut_size[node] = nx.normalized_cut_size(B, labeled_indices, [node])

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400


In [49]:
neighbors = np.array(nx.to_numpy_matrix(B)[source])[0]
conductance_sink_norm = conductance * neighbors / np.sum(conductance * neighbors)
cut_size_sink_norm = cut_size * neighbors / np.sum(cut_size * neighbors)
normalized_cut_size_sink_norm = normalized_cut_size * neighbors / np.sum(normalized_cut_size * neighbors)

In [58]:
seed_features = np.vstack((shortest_paths,num_shortest_paths,maximum_flow,local_edge_connectivity,conductance_sink_norm,cut_size_sink_norm,normalized_cut_size_sink_norm)).T

In [61]:
np.savetxt(f'seed_features/{data}/seed_features_{source}.csv',seed_features,delimiter=',')