In [1]:
import localgraphclustering as lgc
import numpy as np

import matplotlib.pyplot as plt


import sys, traceback
import os
sys.path.insert(0, os.path.join("..", "LocalGraphClustering", "notebooks"))
import helper
import pickle
np.random.seed(seed=123)

In [2]:
helper.lgc_graphlist

['senate',
 'Erdos02',
 'JohnsHopkins',
 'Colgate88',
 'usroads',
 'ppi_mips',
 'ASTRAL',
 'sfld',
 'find_V',
 'ppi-homo',
 'neuro-fmri-01',
 'ca-GrQc',
 'dolphins',
 'disconnected']

In [3]:
def run_improve(g, gname, method, methodname, delta, nthreads=1, timeout=1000):
    ratio = 1.0
    if g._num_vertices > 1000000:
        ratio = 0.05
    elif g._num_vertices > 100000:
        ratio = 0.1    
    elif g._num_vertices > 10000:
        ratio = 0.4        
    elif g._num_vertices > 7500:
        ratio = 0.6
    elif g._num_vertices > 5000:
        ratio = 0.8
    ratio=0.00005
    print("ratio: ", ratio)
    ncp = lgc.NCPData(g,store_output_clusters=True)
    ncp.approxPageRank(ratio=ratio,nthreads=nthreads,localmins=False,neighborhoods=False,random_neighborhoods=False)
    sets = [st["output_cluster"] for st in ncp.results]
    print("Make an NCP object for Improve Algo")
    ncp2 = lgc.NCPData(g)
    print("Going into improve mode")
    try:
        output = ncp2.refine(sets, method=method, methodname=methodname, nthreads=nthreads, timeout=timeout, **{"delta": delta})
    except Exception as E:
        print("Exception in user code:")
        print('-'*60)
        traceback.print_exc(file=sys.stdout)
        print('-'*60)
    fig = lgc.NCPPlots(ncp2).mqi_input_output_cond_plot()[0]
    fig.axes[0].set_title(gname + " " + methodname+"-NCP")
    fig.savefig("figures/" + method + "-ncp-"+gname+".pdf", bbox_inches="tight", figsize=(100,100))
    plt.show()
    pickle.dump(ncp, open('results/' + "mqi" + "-ncp-"+ "ppi-homo" + '.pickle', 'wb'))
    pickle.dump(ncp2, open('results/' + "mqi" + "-ncp2-"+ "ppi-homo" + '.pickle', 'wb'))

In [None]:
## This is a test

for gname in ["ppi-homo"]:
    g = helper.lgc_data(gname)
    g.discard_weights()
    run_improve(g, gname=gname, method="mqi", methodname="MQI", delta=0.3, timeout=100000000)

ratio:  5e-05
There are  1  connected components in the graph
f2:  None
Mpikes?
Run target to get the rval. Workid:  0
target:  <function _ncp_node_worker at 0x11df1b2f0>
Finished running
f2:  None
Mpikes?
Run target to get the rval. Workid:  0
target:  <function _ncp_node_worker at 0x11df1b2f0>
Finished running
f2:  None
Mpikes?
Run target to get the rval. Workid:  0
target:  <function _ncp_neighborhood_worker at 0x11df1b378>
Finished running
f2:  None
Mpikes?
Run target to get the rval. Workid:  0
target:  <function _ncp_neighborhood_worker at 0x11df1b378>
Finished running
Make an NCP object for Improve Algo
There are  1  connected components in the graph
Going into improve mode
Set func:  MQI
f2:  None
Mpikes?
Run samples
Run target to get the rval. Workid:  0
target:  <function _ncp_set_worker at 0x11df1b400>
About to run MQI_cpp
vtype,itype,uint32,uint32
<_FuncPtr object at 0x11d1168e0>
n  8887  nR  3546  ai  [    0    55    79 ... 65406 65413 65416]  aj  [  47  220  223 ... 2844 

## Generate improve plots for MQI

In [None]:
for gname in helper.lgc_graphlist:
    print("Name: ", gname)
    g = helper.lgc_data(gname)
    g.discard_weights()
    run_improve(g, gname=gname, method="mqi", methodname="MQI", delta=0.3, timeout=100000000)

In [None]:
Python(71405,0x115c265c0) malloc: Incorrect checksum for freed object 0x7f89aca13fd8: probably modified after being freed.
Corrupt value: 0xbb00001703
Python(71417,0x11231e5c0) malloc: Incorrect checksum for freed object 0x7fb9ec3e2618: probably modified after being freed.
Corrupt value: 0xbb00001703

## Generate improve plots for MQI for large-scale datasets

In [None]:
mygraphs = {#'orkut':'/Users/kimonfountoulakis/Downloads/com-orkut.ungraph.edgelist',
            'pokec':'/Users/kimonfountoulakis/Downloads/soc-pokec-relationships.edgelist'
            #'livejournal':'/Users/kimonfountoulakis/Downloads/soc-LiveJournal1.edgelist',
            #'email-Enron':'/Users/kimonfountoulakis/Downloads/Email-Enron.edgelist'
           }

for (gname,gfile) in mygraphs.items():
    print(gname, gfile)
    sep = ' '
    if isinstance(gfile, tuple):
        sep = gfile[1]
        gfile = gfile[0]
    g = lgc.GraphLocal(os.path.join("..", "data", gfile),'edgelist', "	")
    g.discard_weights()
    run_improve(g, gname=gname, method="mqi", methodname="MQI", delta=100, timeout=100000000)

pokec /Users/kimonfountoulakis/Downloads/soc-pokec-relationships.edgelist
ratio:  5e-05
There are  2  connected components in the graph



The graph has multiple (2) components, using the largest with 1632803 / 1632804 nodes



Make an NCP object for Improve Algo
There are  2  connected components in the graph


In [None]:
## This is a test for SimpleLocal

for gname in ["ppi-homo"]:
    g = helper.lgc_data(gname)
    g.discard_weights()
    run_improve(g, gname=gname, method="sl", methodname="SimpleLocal", delta=0.3, timeout=10)

## Generate improve plots for SimpleLocal with delta = 0.3

In [None]:
for gname in helper.lgc_graphlist:
    print("Name: ", gname)
    g = helper.lgc_data(gname)
    g.discard_weights()
    run_improve(g, gname=gname, method="sl", methodname="SimpleLocal", delta=0.3, timeout=100000000)

## Generate improve plots for SimpleLocal for large-scale datasets with delta = 0.3

In [None]:
mygraphs = {#'orkut':'/Users/kimonfountoulakis/Downloads/com-orkut.ungraph.edgelist',
            'pokec':'/Users/kimonfountoulakis/Downloads/soc-pokec-relationships.edgelist',
            'livejournal':'/Users/kimonfountoulakis/Downloads/soc-LiveJournal1.edgelist',
            'email-Enron':'/Users/kimonfountoulakis/Downloads/Email-Enron.edgelist'
           }

for (gname,gfile) in mygraphs.items():
    print(gname, gfile)
    sep = ' '
    if isinstance(gfile, tuple):
        sep = gfile[1]
        gfile = gfile[0]
    g = lgc.GraphLocal(os.path.join("..", "data", gfile),'edgelist', "	")
    g.discard_weights()
    #run_improve(g, gname=gname, method="sl", methodname="SimpleLocal", delta=0.3, timeout=100000000)

## Generate improve plots for SimpleLocal with delta = 0.6

In [None]:
for gname in helper.lgc_graphlist:
    print("Name: ", gname)
    g = helper.lgc_data(gname)
    g.discard_weights()
    run_improve(g, gname=gname, method="sl", methodname="SimpleLocal", delta=0.6, timeout=100000000)

## Generate improve plots for SimpleLocal for large-scale datasets with delta = 0.6

In [None]:
mygraphs = {'orkut':'/Users/kimonfountoulakis/Downloads/com-orkut.ungraph.edgelist',
            'pokec':'/Users/kimonfountoulakis/Downloads/soc-pokec-relationships.edgelist',
            'livejournal':'/Users/kimonfountoulakis/Downloads/soc-LiveJournal1.edgelist',
            'email-Enron':'/Users/kimonfountoulakis/Downloads/Email-Enron.edgelist'
           }

for (gname,gfile) in mygraphs.items():
    print(gname, gfile)
    sep = ' '
    if isinstance(gfile, tuple):
        sep = gfile[1]
        gfile = gfile[0]
    g = lgc.GraphLocal(os.path.join("..", "data", gfile),'edgelist', "	")
    g.discard_weights()
    run_improve(g, gname=gname, method="sl", methodname="SimpleLocal", delta=0.6, timeout=100000000)

## Generate improve plots for SimpleLocal with delta = 0.9

In [None]:
for gname in helper.lgc_graphlist:
    print("Name: ", gname)
    g = helper.lgc_data(gname)
    g.discard_weights()
    run_improve(g, gname=gname, method="sl", methodname="SimpleLocal", delta=0.9, timeout=100000000)

## Generate improve plots for SimpleLocal for large-scale datasets with delta = 0.9

In [None]:
mygraphs = {'orkut':'/Users/kimonfountoulakis/Downloads/com-orkut.ungraph.edgelist',
            'pokec':'/Users/kimonfountoulakis/Downloads/soc-pokec-relationships.edgelist',
            'livejournal':'/Users/kimonfountoulakis/Downloads/soc-LiveJournal1.edgelist',
            'email-Enron':'/Users/kimonfountoulakis/Downloads/Email-Enron.edgelist'
           }

for (gname,gfile) in mygraphs.items():
    print(gname, gfile)
    sep = ' '
    if isinstance(gfile, tuple):
        sep = gfile[1]
        gfile = gfile[0]
    g = lgc.GraphLocal(os.path.join("..", "data", gfile),'edgelist', "	")
    g.discard_weights()
    #run_improve(g, gname=gname, method="sl", methodname="SimpleLocal", delta=0.9, timeout=100000000)

In [None]:
from typing import *
import localgraphclustering as lgc
import numpy as np

import matplotlib.pyplot as plt
import functools

import sys
import os
sys.path.insert(0, os.path.join("..", "LocalGraphClustering", "notebooks"))
import helper

NTHREADS=1
NITER=1000000000

def run_refine_ncp(gname, gfile):
    ncp = lgc.NCPData(g).approxPageRank(localmins=False,neighborhoods=False,random_neighborhoods=False,nthreads=NTHREADS, ratio=0.0005)
    sets = [ncp.output_set(i)[0] for i in range(len(ncp.results))]
    ncpR = lgc.NCPData(g)
    ncpR.add_set_samples(sets, 
                         method=lgc.partialfunc(lgc.flow_clustering, method="mqi"),
                         nthreads=NTHREADS)
    return ncp, ncpR
    
mygraphs = {#'digits-1234-3':'digits-1234-3.edges', 
            #'neurosynth':'neurosynth-meta.edges.gz',
            #'wiki-cats':'wiki-cats.edges', 
            'pokec':'/Users/kimonfountoulakis/Downloads/soc-pokec-relationships.edgelist',
            #'livejournal':'soc-LiveJournal-scc.edges.gz',
            #'astro-spectra':('lawlor-spectra-k32.edgelist.gz','\t'),
            #'email-Enron':'email-Enron.edges.gz',
            #'netscience':'netscience-cc.edges'
           }

for (gname,gfile) in mygraphs.items():
    print(gname, gfile)
    sep = ' '
    if isinstance(gfile, tuple):
        sep = gfile[1]
        gfile = gfile[0]
    g = lgc.GraphLocal(os.path.join("..", "data", gfile),
                                 'edgelist', "	")
    g.discard_weights()
    
    ncp, ncpR = run_refine_ncp(g, gname)

In [None]:
import networkx as nx

In [None]:
gnx = nx.read_edgelist('/Users/kimonfountoulakis/Downloads/Email-Enron.edgelist')

In [None]:
nx.write_graphml(gnx, '/Users/kimonfountoulakis/Downloads/Email-Enron.graphml')