Skip to content

Commit

Permalink
New partial function comparison for recursion
Browse files Browse the repository at this point in the history
  • Loading branch information
dgleich committed Nov 8, 2018
1 parent 865c97e commit 07e8a13
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 16 deletions.
2 changes: 1 addition & 1 deletion localgraphclustering/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from .approximate_PageRank import approximate_PageRank
from .approximate_PageRank_weighted import approximate_PageRank_weighted
from .sweep_cut import sweep_cut
from .ncp import NCPData
from .ncp import NCPData, partialfunc
from .ncpplots import NCPPlots
from .densest_subgraph import densest_subgraph
from .multiclass_label_prediction import multiclass_label_prediction
Expand Down
37 changes: 28 additions & 9 deletions localgraphclustering/ncp.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,16 @@
from .triangleclusters import triangleclusters
from .cpp import *

class partialfunc(functools.partial):
@classmethod
def from_partial(cls, f):
return cls(f.func, f.args, f.keywords)
def __eq__(self, f2):
if not (isinstance(f2, partialfunc)):
return False
return all([getattr(self, attr) == getattr(f2, attr) for attr in ['func', 'args', 'keywords']])
__hash__ = functools.partial.__hash__

class SimpleLogForLongComputations:
""" Implement a simple logger that will record messages and then
replay them if a timer exceeds a threshold."""
Expand Down Expand Up @@ -51,6 +61,7 @@ def log(self, message):
self._log.append((t, message))

def _partial_functions_equal(func1, func2):
assert(False) # shouldn't be called now
if not (isinstance(func1, functools.partial) and isinstance(func2, functools.partial)):
return False
are_equal = all([getattr(func1, attr) == getattr(func2, attr) for attr in ['func', 'args', 'keywords']])
Expand Down Expand Up @@ -388,7 +399,15 @@ def as_data_frame(self):
""" Return the NCP results as a pandas dataframe """
df = pd.DataFrame.from_records(self.results, columns=self.result_fields)
# convert to human readable names
df["method"] = df["methodfunc"].map(self.method_names)
# It's important that this dictionary is converted into a lookup
# function so the pandas map function works correctly with our
# partial functions that may hash differently but compare as equal.
# Ideally, we'd call...
# df["method"] = df["methodfunc"].map(self.method_names)
df["method"] = df["methodfunc"].map(lambda x: self.method_names[x])
# TODO, since this is a bit hacky, it's probably worth storing
# the method name in the results itself. That's probably better at
# this point

return df

Expand Down Expand Up @@ -501,7 +520,7 @@ def approxPageRank(self,
if localmins:
for rho in rholist:
self.add_localmin_samples(
method=functools.partial(
method=partialfunc(
spectral_clustering,**spectral_args,alpha=alpha,rho=rho*10,method=method),
methodname="%s_localmin:rho=%.0e"%(methodname, rho*10),
neighborhoods=True,
Expand All @@ -516,7 +535,7 @@ def approxPageRank(self,
if myratio is not None:
kwargs['ratio'] = myratio
self.add_random_node_samples(
method=functools.partial(
method=partialfunc(
spectral_clustering,**spectral_args,alpha=alpha,rho=rho,method=method),
methodname="%s:rho=%.0e"%(methodname, rho),
timeout=timeout/(nruns*len(rholist)), **kwargs)
Expand All @@ -528,7 +547,7 @@ def approxPageRank(self,
if myratio is not None:
kwargs['ratio'] = myratio
self.add_random_neighborhood_samples(
method=functools.partial(
method=partialfunc(
spectral_clustering,**spectral_args,alpha=alpha,rho=rho*10,method=method),
methodname="%s_neighborhoods:rho=%.0e"%(methodname, rho*10),
timeout=timeout/(len(rholist)), **kwargs)
Expand Down Expand Up @@ -570,7 +589,7 @@ def l1reg(self,
nthreads: int = 4,
timeout: float = 1000):
alpha = 1.0-1.0/(1.0+gamma)
funcs = {functools.partial(spectral_clustering, alpha=alpha,rho=rho,method="l1reg"):'l1reg;rho=%.0e'%(rho)
funcs = {partialfunc(spectral_clustering, alpha=alpha,rho=rho,method="l1reg"):'l1reg;rho=%.0e'%(rho)
for rho in rholist}
for func in funcs.keys():
self.add_random_node_samples(method=func,methodname=funcs[func],ratio=ratio,nthreads=nthreads,timeout=timeout/len(funcs))
Expand All @@ -583,7 +602,7 @@ def crd(self,
ratio: float = 0.3,
nthreads: int = 4,
timeout: float = 1000):
func = functools.partial(flow_clustering,w=w, U=U, h=h,method="crd")
func = partialfunc(flow_clustering,w=w, U=U, h=h,method="crd")
self.add_random_neighborhood_samples(method=func,methodname="crd",
ratio=ratio,nthreads=nthreads,timeout=timeout/2)
self.add_random_node_samples(method=func,methodname="crd",
Expand All @@ -594,7 +613,7 @@ def mqi(self,
ratio: float = 0.3,
nthreads: int = 4,
timeout: float = 1000):
func = functools.partial(flow_clustering,method="mqi")
func = partialfunc(flow_clustering,method="mqi")
self.add_random_neighborhood_samples(ratio=ratio,nthreads=nthreads,timeout=timeout,
method=func,methodname="mqi")
return self
Expand All @@ -609,12 +628,12 @@ def add_fiedler(self):
# note that we use functools partial here to create a new function
# that we name "fiedler" even though the code is just evaluate_set
return self.add_set_samples(methodname="fiedler",
method=functools.partial(_evaluate_set), nthreads=1, sets=[S])
method=partialfunc(_evaluate_set), nthreads=1, sets=[S])

def add_fiedler_mqi(self):
S = self._fiedler_set()
return self.add_set_samples(methodname="fiedler-mqi",
method=functools.partial(flow_clustering,method="mqi"), nthreads=1, sets=[S])
method=partialfunc(flow_clustering,method="mqi"), nthreads=1, sets=[S])

def add_neighborhoods(self, **kwargs):
return self.add_random_neighborhood_samples(
Expand Down
6 changes: 6 additions & 0 deletions localgraphclustering/spectral_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def spectral_clustering(G, ref_nodes,
ys: Sequence[float] = None,
vol: float = 100,
phi: float = 0.5,
refine = None,
method: str = "acl"):
"""
Provide a simple interface to do spectral based clustering.
Expand All @@ -33,6 +34,8 @@ def spectral_clustering(G, ref_nodes,
Which method to use for the nodes embedding.
Options: "acl", "l1reg", "nibble", "fiedler", "fiedler_local"
refine:
Extra parameters for "acl" and "l1reg" (optional)
-------------------------------------------------
Expand Down Expand Up @@ -107,4 +110,7 @@ def spectral_clustering(G, ref_nodes,

output = sweep_cut(G,p)

if refine is not None:
output = refine(G,list(output[0]))

return output
18 changes: 12 additions & 6 deletions localgraphclustering/tests/test_ncp.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import localgraphclustering as lgc
import pytest
from functools import partial
#from functools import partial

def load_example_graph():
return lgc.GraphLocal("localgraphclustering/tests/data/dolphins.edges",separator=" ")
Expand All @@ -14,7 +14,7 @@ def test_ncp():
df = ncp.as_data_frame()
assert len(df) == G._num_vertices
#func = lambda G,R: lgc.flow_clustering(G,R,method="mqi")[0]
func = partial(lgc.flow_clustering, method="mqi")
func = lgc.partialfunc(lgc.flow_clustering, method="mqi")
ncp = lgc.NCPData(G)
ncp.add_set_samples([[1]],nthreads=1,method=func,methodname="mqi")
ncp.add_random_neighborhood_samples(ratio=2,nthreads=1,method=func,methodname="mqi")
Expand Down Expand Up @@ -83,7 +83,7 @@ def test_ncp_l1reg():
def test_ncp_localmin():
G = load_example_graph()
ncp = lgc.NCPData(G)
func = partial(lgc.spectral_clustering,alpha=0.01,rho=1.0e-4,method="acl")
func = lgc.partialfunc(lgc.spectral_clustering,alpha=0.01,rho=1.0e-4,method="acl")

ncp.default_method = func
ncp.add_localmin_samples(ratio=1)
Expand All @@ -93,7 +93,7 @@ def test_ncp_localmin():
G = lgc.GraphLocal()
G.list_to_gl([0,1],[1,0],[1,1])
ncp = lgc.NCPData(G)
func = partial(lgc.spectral_clustering,alpha=0.01,rho=1.0e-4,method="acl")
func = lgc.partialfunc(lgc.spectral_clustering,alpha=0.01,rho=1.0e-4,method="acl")

ncp.default_method = func
ncp.add_localmin_samples(ratio=1)
Expand All @@ -109,10 +109,16 @@ def test_ncp_sets():
def test_apr_deep():
G = load_example_graph()
df = lgc.NCPData(G).approxPageRank(ratio=1, gamma=0.1, rholist=[1e-2, 1e-3], deep=True)

def test_apr_only_node_samples():
G = load_example_graph()
df = lgc.NCPData(G).approxPageRank(ratio=1, gamma=0.1, rholist=[1e-2, 1e-3], random_neighborhoods=False, localmins=False)
df = lgc.NCPData(G).approxPageRank(ratio=1, gamma=0.1, rholist=[1e-2, 1e-3], random_neighborhoods=False, localmins=False)

def test_apr_refine():
G = load_example_graph()
df = lgc.NCPData(G).approxPageRank(ratio=1, gamma=0.1, rholist=[1e-2, 1e-3],
random_neighborhoods=False, localmins=False,
spectral_args={'refine': lgc.partialfunc(lgc.flow_clustering, method="mqi")})

@pytest.mark.long_tests
def test_ncp_crd_big():
Expand Down

0 comments on commit 07e8a13

Please sign in to comment.