In [1]:
from localgraphclustering import *

import time
import numpy as np

# Load graph

In [2]:
# Read graph. This also supports gml format.
g = graph_class_local.GraphLocal('JohnsHopkins.edgelist','edgelist','\t')

# Spectral global graph partitioning

In [3]:
# Create an object for global spectral partitioning
sp = spectral_partitioning.Spectral_partitioning()

# Call the global spectral partitioning algorithm.
output_sp = sp.produce([g])

# Create objects for spectral local graph partitioning methods

In [4]:
# Create an object for approximate PageRank algorithm.
pr = approximate_PageRank.Approximate_PageRank()
# Create an object for the C++ version of the approximate PageRank algorithm.
pr_fast = approximate_PageRank_fast.Approximate_PageRank_fast()
# Create an object for a weighted approximate PageRank algorithm.
pr_weighted_fast = approximate_PageRank_weighted_fast.Approximate_PageRank_weighted_fast()
# Create an object for a L1-regularized PageRank solver.
l1reg = l1_regularized_PageRank.L1_regularized_PageRank()
# Create an object for an accelerated L1-regularized PageRank solver.
l1reg_fast = l1_regularized_PageRank_fast.L1_regularized_PageRank_fast()
# Create an object for PageRank Nibble algorithm.
pr_nb = pageRank_nibble.PageRank_nibble()

# Create objects for rounding methods

In [5]:
# Create an object for the sweep cut rounding procedure.
sc = sweepCut_general.SweepCut_general()
# Create an object for a C++ version of the sweep cut rounding procedure.
sc_fast = sweepCut_fast.SweepCut_fast()
# Create an object for the degree normalized sweep cut rounding procedure.
sc_normalized = sweepCut_normalized.SweepCut_normalized()

# Create an object for multiclass label prediction.

In [6]:
# Create an object for multiclass label prediction.
mc = multiclass_label_prediction.Multiclass_label_prediction()

# Example: produce embeddings using l1-regularized PageRank

In [7]:
# Find the solution of L1-regularized PageRank using localized accelerated gradient descent.
# This method is the fastest among other l1-regularized solvers and other approximate PageRank solvers.
output_l1reg_fast = l1reg_fast.produce([g],[3215])

# More examples

In [8]:
# Find an approximate PageRank vector.
output_pr = pr.produce([g],[0])
# Find an approximate PageRank vector by calling a C++ code.
output_pr_fast = pr_fast.produce([g],[0])
# Find an approximate PageRank vector.
output_pr_weighted_fast = pr_fast.produce([g],[0])
# Find the solution of L1-regularized PageRank.
output_l1reg = l1reg.produce([g],[0])
# Call the PageRank nibble algorithm.
output_pr_nb = pr_nb.produce([g],[0])

# Examples for rounding algorithms

In [9]:
# Call sweep cut rounding on the l1-regularized PageRank solution.
output_sc = sc.produce([g],p=output_l1reg_fast[0])
# Call C++ version of sweep cut rounding on the l1-regularized PageRank solution.
output_sc_fast = sc_fast.produce([g],p=output_l1reg_fast[0])
# Call degree normalized sweep cut rounding on the l1-regularized PageRank solution.
output_sc_normalized = sc_normalized.produce([g],p=output_l1reg_fast[0])

# Example for multiclass label prediction

In [10]:
# List of nodes around which we want to find labels
labels = [[1,10,300],[3215],[1002,2500,540]]

# This function predicts labels for unlabelled nodes. For details refer to:
# D. Gleich and M. Mahoney. Variational 
# Using Local Spectral Methods to Robustify Graph-Based Learning Algorithms. SIGKDD 2015.
# https://www.stat.berkeley.edu/~mmahoney/pubs/robustifying-kdd15.pdf
# To check to documentation for the input and output of this function call 
# help(multiclass_label_prediction)
output_mc = mc.produce([g],labels=labels)

# Network Community Profile

In [11]:
ncp_instance = ncp.Ncp()
start = time.time()
output_ncp = ncp_instance.produce([g],1,timeout_ncp = 1000,nthreads = 20,multi_threads = True)
end = time.time()
print(end-start)

There are  1  connected components in the graph


Exception in thread Thread-17:
Traceback (most recent call last):
  File "/usr/local/Cellar/python3/3.6.3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/usr/local/Cellar/python3/3.6.3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/threading.py", line 864, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.6/site-packages/localgraphclustering/ncp_algo.py", line 49, in worker
    output_sc_fast = sc_fast.produce([g_copy],p=output_l1reg_fast[0])
  File "/usr/local/lib/python3.6/site-packages/localgraphclustering/sweepCut_fast.py", line 75, in produce
    input.adjacency_matrix.data, nnz_idx, nnz_ct, sc_p, 1 - do_sort)
  File "/usr/local/lib/python3.6/site-packages/localgraphclustering/sweepcut_cpp.py", line 78, in sweepcut_cpp
    actual_results[:]=[results[i] for i in range(actual_length)]
  File "/usr/local/lib/python3.6/site-packages/localgraphclustering/sweepcut_cpp.py

Traceback (most recent call last):
  File "/usr/local/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-11-8b96810affb3>", line 3, in <module>
    output_ncp = ncp_instance.produce([g],1,timeout_ncp = 1000,nthreads = 20,multi_threads = True)
  File "/usr/local/lib/python3.6/site-packages/localgraphclustering/ncp.py", line 79, in produce
    return [ncp_algo(inputs[i], ratio=ratio, timeout=timeout, timeout_ncp=timeout_ncp, iterations=iterations, epsilon=epsilon, nthreads=nthreads, multi_threads=multi_threads) for i in range(len(inputs))]
  File "/usr/local/lib/python3.6/site-packages/localgraphclustering/ncp.py", line 79, in <listcomp>
    return [ncp_algo(inputs[i], ratio=ratio, timeout=timeout, timeout_ncp=timeout_ncp, iterations=iterations, epsilon=epsilon, nthreads=nthreads, multi_threads=multi_threads) for i in range(len(inputs))]
  File "/usr/local/lib/python3.6/site-pa

KeyboardInterrupt: 

In [None]:
g = graph_class_local.GraphLocal('Erdos02-cc.edgelist','edgelist',' ')
ncp_instance = ncp.Ncp()
start = time.time()
output_ncp = ncp_instance.produce([g],1,timeout_ncp = 1000,nthreads = 20,multi_threads = True)
end = time.time()
print(end-start)

# Example of flow-based local graph partitioning

In [None]:
g = graph_class_local.GraphLocal('JohnsHopkins.edgelist','edgelist','\t')

# Create an object for the C++ version of Capacity Releasing Diffusion
crd_fast = capacity_releasing_diffusion_fast.Capacity_Releasing_Diffusion_fast()

#Produce result
output_crd_fast = crd_fast.produce([g],[3215])

# More graph methods: Flow-based local graph clustering 

In [None]:
# How to use subgraph partitioning with MQI method.

g = graph_class_local.GraphLocal('minnesota.edgelist','edgelist',' ')

# Create an object for subgraph node partitioning.
MQI_fast = MQI_fast.MQI_fast()

# The initial cut we want to improve
R = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,73,74,75,76,77,78,79,80,81,82,83,84,85,87,88,89,90,91,92,93,94,95,97,98,99,100,102,103,104,105,106,108,112,114,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,139,140,141,142,143,144,145,147,148,149,150,151,152,155,157,158,159,160,161,162,164,165,166,168,169,171,172,173,176,177,178,179,180,185,187,188,191,192,195,196,197,201,208,209,210,211,212,215,217,218,219,221,223,225,226,227,228,231,232,244,245,246,247,248,249,253,254,257,261,262,265,269,270,271,272,273,275,276,277,278,279,285,286,287,290,291,299,303,323,327]

output_MQI_fast = MQI_fast.produce([g],[R])

# More graph methods: densest subgraph 

In [None]:
#How to use densest subgraph method.
g = graph_class_local.GraphLocal('Erdos02-cc.edgelist','edgelist',' ')

# Create an object for densest subgraph.
ds_fast = densest_subgraph_fast.densest_subgraph_fast()

# Produce result
output_ds_fast = ds_fast.produce([g])

In [37]:
ratio = 0.1

conductance_vs_vol_R = []
isoperimetry_vs_size_R = []
conductance_vs_vol_output = []
isoperimetry_vs_size_output = []
conductance_vs_node_R = {}
isoperimetry_vs_node_R = {}
conductance_vs_node_output = {}
isoperimetry_vs_node_output = {}

MQI_fast_obj = MQI_fast.MQI_fast()

g.connected_components()
        
number_of_components = g.number_of_components
        
#if number_of_components <= 0:
#    print("There are no connected components in the given graph")
#    return
        
for i in range(number_of_components):
    conductance_vs_vol_R.append({})
    conductance_vs_size_R.append({})
    conductance_vs_vol_output.append({})
    conductance_vs_size_output.append({})

for cmp in range(number_of_components):
    
    nodes_of_component = list(g.components[cmp])
    g_copy = graph_class_local.GraphLocal()
    g_copy.adjacency_matrix = g.adjacency_matrix[nodes_of_component,:].tocsc()[:,nodes_of_component].tocsr()
    g_copy.compute_statistics()          
        
    n = g_copy.adjacency_matrix.shape[0]

    n_nodes = min(np.ceil(ratio*n),n)
    n_nodes = int(n_nodes)
            
    nodes = np.random.choice(np.arange(0,n), size=n_nodes, replace=False)

    for node in nodes:
        R = g.adjacency_matrix[:,node].nonzero()[0].tolist()
        output_MQI_fast = MQI_fast_obj.produce([g],[R])
        output = output_MQI_fast[0][0].tolist()

        v_ones_R = np.zeros(n)
        v_ones_R[R] = 1
        
        v_ones_output = np.zeros(n)
        v_ones_output[output] = 1
        
        vol_R = sum(g_copy.d[R])
        size_R = len(R)

        vol_output = sum(g_copy.d[output])
        size_output = len(output)        
        
        cut_R = vol_R - np.dot(v_ones_R,g_copy.adjacency_matrix.dot(v_ones_R.T))
        cut_output = vol_output - np.dot(v_ones_output,g_copy.adjacency_matrix.dot(v_ones_output.T))
        
        cond_R = cut_R/min(vol_R,g_copy.vol_G - vol_R)
        cond_output = cut_output/min(vol_output,g_copy.vol_G - vol_output)
        
        conductance_vs_node_R[node] = cond_R
        conductance_vs_node_output[node] = cond_output
        
        isop_R = cut_R/min(size_R,g_copy.n - size_R)
        isop_output = cut_output/min(size_output,g_copy.n - size_output)
        
        isoperimetry_vs_node_R[node] = isop_R
        isoperimetry_vs_node_output[node] = isop_output

        if vol_R in conductance_vs_vol_R[cmp]:
            if cond_R <= conductance_vs_vol_R[cmp][vol_R]:
                conductance_vs_vol_R[cmp][vol_R] = cond_R
        else:
            conductance_vs_vol_R[cmp][vol_R] = cond_R  

        if size_R in isoperimetry_vs_size_R[cmp]:
            if isop_R <= isoperimetry_vs_size_R[cmp][size_R]:
                isoperimetry_vs_size_R[cmp][size_R] = isop_R
        else:
            isoperimetry_vs_size_R[cmp][size_R] = isop_R 
            
        if vol_output in conductance_vs_vol_output[cmp]:
            if cond_output <= conductance_vs_vol_output[cmp][vol_output]:
                conductance_vs_vol_output[cmp][vol_output] = cond_output
        else:
            conductance_vs_vol_output[cmp][vol_output] = cond_output  

        if size_output in isoperimetry_vs_size_output[cmp]:
            if isop_output <= isoperimetry_vs_size_output[cmp][size_output]:
                isoperimetry_vs_size_output[cmp][size_output] = isop_output
        else:
            isoperimetry_vs_size_output[cmp][size_output] = isop_output 

There are  1  connected components in the graph


