Skip to content

Commit

Permalink
New experiments
Browse files Browse the repository at this point in the history
  • Loading branch information
kfoynt committed Feb 2, 2019
1 parent 8caacdc commit a26e9df
Show file tree
Hide file tree
Showing 33 changed files with 28,276 additions and 179,440 deletions.
44 changes: 35 additions & 9 deletions notebooks/NCPs.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -684,14 +684,14 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/site-packages/localgraphclustering/GraphLocal.py:203: UserWarning:\n",
"/usr/local/lib/python3.7/site-packages/localgraphclustering/GraphLocal.py:217: UserWarning:\n",
"\n",
"Loading a graphml is not efficient, we suggest using an edgelist format for this API.\n",
"\n"
Expand Down Expand Up @@ -3075,17 +3075,43 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"data": {
"text/plain": [
"126146"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"g._num_vertices"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"data": {
"text/plain": [
"323900"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"g._num_edges"
]
},
{
"cell_type": "code",
Expand Down
823 changes: 0 additions & 823 deletions notebooks/NCPs_for_flow_paper.ipynb

This file was deleted.

105 changes: 101 additions & 4 deletions notebooks/NCPs_for_flow_paper.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,32 @@

pickle.dump(ncp_instance, open('results/ncp-senate.pickle', 'wb'))

mygraphs = {#'email-Enron':'/u4/kfountoulakis/flowReviewPaper/LocalGraphClustering/notebooks/datasets/email-Enron.edgelist',
#'pokec':'/u4/kfountoulakis/flowReviewPaper/LocalGraphClustering/notebooks/datasets/soc-pokec-relationships.edgelist',
'orkut':'/u4/kfountoulakis/flowReviewPaper/LocalGraphClustering/notebooks/datasets/com-orkut.ungraph.edgelist',
# Run us-roads
g = lgc.GraphLocal('./datasets/usroads-cc.graphml','graphml')
g.discard_weights()

ncp_instance = lgc.NCPData(g)
ncp_instance.approxPageRank(ratio=0.8,timeout=5000000,nthreads=24)

ncp_plots = lgc.NCPPlots(ncp_instance,method_name = "acl")
#plot conductance vs size
fig, ax, min_tuples = ncp_plots.cond_by_size()
plt.savefig('figures/cond_card_usroads.png', bbox_inches='tight')
plt.show()
#plot conductance vs volume
fig, ax, min_tuples = ncp_plots.cond_by_vol()
plt.savefig('figures/cond_vol_usroads.png', bbox_inches='tight')
plt.show()
#plot isoperimetry vs size
fig, ax, min_tuples = ncp_plots.isop_by_size()
plt.savefig('figures/expand_card_usroads.png', bbox_inches='tight')
plt.show()

pickle.dump(ncp_instance, open('results/ncp-usroads.pickle', 'wb'))

mygraphs = {'email-Enron':'/u4/kfountoulakis/flowReviewPaper/LocalGraphClustering/notebooks/datasets/email-Enron.edgelist',
'pokec':'/u4/kfountoulakis/flowReviewPaper/LocalGraphClustering/notebooks/datasets/soc-pokec-relationships.edgelist',
#'orkut':'/u4/kfountoulakis/flowReviewPaper/LocalGraphClustering/notebooks/datasets/com-orkut.ungraph.edgelist',
'livejournal':'/u4/kfountoulakis/flowReviewPaper/LocalGraphClustering/notebooks/datasets/soc-LiveJournal1.edgelist'
}

Expand Down Expand Up @@ -74,4 +97,78 @@
plt.savefig('figures/expand_card_' + gname + '.png', bbox_inches='tight')
plt.show()

pickle.dump(ncp_instance, open('results/ncp' + gname + '.pickle', 'wb'))
pickle.dump(ncp_instance, open('results/ncp' + gname + '.pickle', 'wb'))


# Run orkut seperately
print("Running orkut")

name = '/u4/kfountoulakis/flowReviewPaper/LocalGraphClustering/notebooks/datasets/com-orkut.ungraph.edgelist'
g = lgc.GraphLocal(os.path.join(data_path,name),'edgelist', " ")

comm_name = '/u4/kfountoulakis/flowReviewPaper/LocalGraphClustering/notebooks/datasets/com-orkut.top5000.cmty.txt'
ref_nodes_unfiltered = []
with open(comm_name, "r") as f:
for line in f:
new_line = []
for i in line.split():
if i.isdigit():
new_line.append(int(i))
ref_nodes_unfiltered.append(new_line)


n = g._num_vertices

number_feature = 0

ref_nodes = []
info_ref_nodes = []

for ff in ref_nodes_unfiltered:

vol_ff = sum(g.d[ff])

if vol_ff < 100:
continue

cond_ff = g.compute_conductance(ff,cpp=True)

if cond_ff > 0.47:
continue

print("Reached")
eig_ff, lambda_ff = lgc.fiedler_local(g, ff)
lambda_ff = np.real(lambda_ff)
gap_ff = lambda_ff/cond_ff

print("Number of feature", number_feature, " gap ",gap_ff, " volume: ", vol_ff, " size:", len(ff), "conductance: ", cond_ff)

if gap_ff >= 0.5 and vol_ff >= 100:
ref_nodes.append(ff)
np.save('results/ref_nodes_orkut', ref_nodes)

number_feature += 1

ncp_instance = lgc.NCPData(g)
ncp_instance.approxPageRank(ratio=0.1,timeout=5000000,nthreads=24)
ncp_instance.add_set_samples_without_method(ref_nodes)

ncp_plots = lgc.NCPPlots(ncp_instance,method_name = "")
#plot conductance vs size
fig, ax, min_tuples = ncp_plots.cond_by_size()
counter = 0
for cluster in ref_nodes:
ax.scatter([len(cluster)], [g.compute_conductance(cluster,cpp=True)], c="green", s=250, marker='D',zorder=100000)
counter += 1
plt.savefig('figures/cond_card_orkut.png', bbox_inches='tight')
plt.show()
#plot conductance vs volume
fig, ax, min_tuples = ncp_plots.cond_by_vol()
plt.savefig('figures/cond_vol_orkut.png', bbox_inches='tight')
plt.show()
#plot isoperimetry vs size
fig, ax, min_tuples = ncp_plots.isop_by_size()
plt.savefig('figures/expand_card_orkut.png', bbox_inches='tight')
plt.show()

pickle.dump(ncp_instance, open('results/ncporkut.pickle', 'wb'))
303 changes: 236 additions & 67 deletions notebooks/NCPs_for_flow_paperSecondPart.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion notebooks/examples.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -3110,7 +3110,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
"version": "3.7.1"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion notebooks/improveType_algorithms_with_visualization.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -952,7 +952,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
"version": "3.7.1"
}
},
"nbformat": 4,
Expand Down
1,169 changes: 0 additions & 1,169 deletions notebooks/improve_results_conductance.ipynb

This file was deleted.

11 changes: 7 additions & 4 deletions notebooks/improve_results_conductance_MQI.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,19 @@ def run_improve(g, gname, method, methodname, delta, nthreads=24, timeout=1000):
elif g._num_vertices > 5000:
ratio = 0.8
print("ratio: ", ratio)
start = time.time()
ncp = lgc.NCPData(g,store_output_clusters=True)
ncp.approxPageRank(ratio=ratio,nthreads=nthreads,localmins=False,neighborhoods=False,random_neighborhoods=False)
end = time.time()
print("Elapsed time for acl-ncp for dataset ", gname , " is ", end - start, " the method is ", methodname, " delta is ", delta)
sets = [st["output_cluster"] for st in ncp.results]
print("Make an NCP object for Improve Algo")
start2 = time.time()
ncp2 = lgc.NCPData(g)
print("Going into improve mode")
output = ncp2.refine(sets, method=method, methodname=methodname, nthreads=nthreads, timeout=timeout, **{"delta": delta})
end2 = time.time()
print("Elapsed time for improve-ncp for dataset ", gname , " is ", end2 - start2, " the method is ", methodname, " delta is ", delta)
fig = lgc.NCPPlots(ncp2).mqi_input_output_cond_plot()[0]
fig.axes[0].set_title(gname + " " + methodname+"-NCP")
fig.savefig("figures/" + method + "-ncp-"+gname+".pdf", bbox_inches="tight", figsize=(100,100))
Expand All @@ -49,7 +55,6 @@ def run_improve(g, gname, method, methodname, delta, nthreads=24, timeout=1000):
'livejournal':'/u4/kfountoulakis/flowReviewPaper/LocalGraphClustering/notebooks/datasets/soc-LiveJournal1.edgelist'
}

start = time.time()
for (gname,gfile) in mygraphs.items():
print(gname, gfile)
sep = ' '
Expand All @@ -58,6 +63,4 @@ def run_improve(g, gname, method, methodname, delta, nthreads=24, timeout=1000):
gfile = gfile[0]
g = lgc.GraphLocal(os.path.join("..", "data", gfile),'edgelist', " ")
g.discard_weights()
run_improve(g, gname=gname, method="mqi", methodname="MQI", delta=100, timeout=100000000)
end = time.time()
print("Elapsed time for ", gname , " is ", end - start)
run_improve(g, gname=gname, method="mqi", methodname="MQI", delta=100, timeout=100000000)
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,19 @@ def run_improve(g, gname, method, methodname, delta, nthreads=24, timeout=1000):
elif g._num_vertices > 5000:
ratio = 0.8
print("ratio: ", ratio)
start = time.time()
ncp = lgc.NCPData(g,store_output_clusters=True)
ncp.approxPageRank(ratio=ratio,nthreads=nthreads,localmins=False,neighborhoods=False,random_neighborhoods=False)
end = time.time()
print("Elapsed time for acl-ncp for dataset ", gname , " is ", end - start, " the method is ", methodname, " delta is ", delta)
sets = [st["output_cluster"] for st in ncp.results]
print("Make an NCP object for Improve Algo")
start2 = time.time()
ncp2 = lgc.NCPData(g)
print("Going into improve mode")
output = ncp2.refine(sets, method=method, methodname=methodname, nthreads=nthreads, timeout=timeout, **{"delta": delta})
end2 = time.time()
print("Elapsed time for improve-ncp for dataset ", gname , " is ", end2 - start2, " the method is ", methodname, " delta is ", delta)
fig = lgc.NCPPlots(ncp2).mqi_input_output_cond_plot()[0]
fig.axes[0].set_title(gname + " " + methodname+"-NCP")
fig.savefig("figures/" + method + "delta" + str(delta) + "-ncp-"+gname+".pdf", bbox_inches="tight", figsize=(100,100))
Expand All @@ -48,7 +54,6 @@ def run_improve(g, gname, method, methodname, delta, nthreads=24, timeout=1000):
'livejournal':'/u4/kfountoulakis/flowReviewPaper/LocalGraphClustering/notebooks/datasets/soc-LiveJournal1.edgelist'
}

start = time.time()
for (gname,gfile) in mygraphs.items():
print(gname, gfile)
sep = ' '
Expand All @@ -57,6 +62,4 @@ def run_improve(g, gname, method, methodname, delta, nthreads=24, timeout=1000):
gfile = gfile[0]
g = lgc.GraphLocal(os.path.join("..", "data", gfile),'edgelist', " ")
g.discard_weights()
run_improve(g, gname=gname, method="sl", methodname="SimpleLocal", delta=0.3, timeout=100000000)
end = time.time()
print("Elapsed time for ", gname , " is ", end - start)
run_improve(g, gname=gname, method="sl", methodname="SimpleLocal", delta=0.3, timeout=100000000)
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,19 @@ def run_improve(g, gname, method, methodname, delta, nthreads=24, timeout=1000):
elif g._num_vertices > 5000:
ratio = 0.8
print("ratio: ", ratio)
start = time.time()
ncp = lgc.NCPData(g,store_output_clusters=True)
ncp.approxPageRank(ratio=ratio,nthreads=nthreads,localmins=False,neighborhoods=False,random_neighborhoods=False)
end = time.time()
print("Elapsed time for acl-ncp for dataset ", gname , " is ", end - start, " the method is ", methodname, " delta is ", delta)
sets = [st["output_cluster"] for st in ncp.results]
print("Make an NCP object for Improve Algo")
start2 = time.time()
ncp2 = lgc.NCPData(g)
print("Going into improve mode")
output = ncp2.refine(sets, method=method, methodname=methodname, nthreads=nthreads, timeout=timeout, **{"delta": delta})
end2 = time.time()
print("Elapsed time for improve-ncp for dataset ", gname , " is ", end2 - start2, " the method is ", methodname, " delta is ", delta)
fig = lgc.NCPPlots(ncp2).mqi_input_output_cond_plot()[0]
fig.axes[0].set_title(gname + " " + methodname+"-NCP")
fig.savefig("figures/" + method + "delta" + str(delta) + "-ncp-"+gname+".pdf", bbox_inches="tight", figsize=(100,100))
Expand All @@ -46,7 +52,6 @@ def run_improve(g, gname, method, methodname, delta, nthreads=24, timeout=1000):
mygraphs = {'orkut':'/u4/kfountoulakis/flowReviewPaper/LocalGraphClustering/notebooks/datasets/com-orkut.ungraph.edgelist'
}

start = time.time()
for (gname,gfile) in mygraphs.items():
print(gname, gfile)
sep = ' '
Expand All @@ -55,6 +60,4 @@ def run_improve(g, gname, method, methodname, delta, nthreads=24, timeout=1000):
gfile = gfile[0]
g = lgc.GraphLocal(os.path.join("..", "data", gfile),'edgelist', " ")
g.discard_weights()
run_improve(g, gname=gname, method="sl", methodname="SimpleLocal", delta=0.3, timeout=100000000)
end = time.time()
print("Elapsed time for ", gname , " is ", end - start)
run_improve(g, gname=gname, method="sl", methodname="SimpleLocal", delta=0.3, timeout=100000000)
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,19 @@ def run_improve(g, gname, method, methodname, delta, nthreads=24, timeout=1000):
elif g._num_vertices > 5000:
ratio = 0.8
print("ratio: ", ratio)
start = time.time()
ncp = lgc.NCPData(g,store_output_clusters=True)
ncp.approxPageRank(ratio=ratio,nthreads=nthreads,localmins=False,neighborhoods=False,random_neighborhoods=False)
end = time.time()
print("Elapsed time for acl-ncp for dataset ", gname , " is ", end - start, " the method is ", methodname, " delta is ", delta)
sets = [st["output_cluster"] for st in ncp.results]
print("Make an NCP object for Improve Algo")
start2 = time.time()
ncp2 = lgc.NCPData(g)
print("Going into improve mode")
output = ncp2.refine(sets, method=method, methodname=methodname, nthreads=nthreads, timeout=timeout, **{"delta": delta})
end2 = time.time()
print("Elapsed time for improve-ncp for dataset ", gname , " is ", end2 - start2, " the method is ", methodname, " delta is ", delta)
fig = lgc.NCPPlots(ncp2).mqi_input_output_cond_plot()[0]
fig.axes[0].set_title(gname + " " + methodname+"-NCP")
fig.savefig("figures/" + method + "delta" + str(delta) + "-ncp-"+gname+".pdf", bbox_inches="tight", figsize=(100,100))
Expand All @@ -48,7 +54,6 @@ def run_improve(g, gname, method, methodname, delta, nthreads=24, timeout=1000):
'livejournal':'/u4/kfountoulakis/flowReviewPaper/LocalGraphClustering/notebooks/datasets/soc-LiveJournal1.edgelist'
}

start = time.time()
for (gname,gfile) in mygraphs.items():
print(gname, gfile)
sep = ' '
Expand All @@ -57,6 +62,4 @@ def run_improve(g, gname, method, methodname, delta, nthreads=24, timeout=1000):
gfile = gfile[0]
g = lgc.GraphLocal(os.path.join("..", "data", gfile),'edgelist', " ")
g.discard_weights()
run_improve(g, gname=gname, method="sl", methodname="SimpleLocal", delta=0.6, timeout=100000000)
end = time.time()
print("Elapsed time for ", gname , " is ", end - start)
run_improve(g, gname=gname, method="sl", methodname="SimpleLocal", delta=0.6, timeout=100000000)
Loading

0 comments on commit a26e9df

Please sign in to comment.