diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7f6e015 --- /dev/null +++ b/.gitignore @@ -0,0 +1,87 @@ +ompiled source # +################### +*.com +*.class +*.dll +*.exe +*.o +*.so + +# Packages # +############ +# it's better to unpack these files and commit the raw source +# git has its own built in compression methods +*.7z +*.dmg +*.gz +*.iso +*.jar +*.rar +*.tar +*.zip + +# Logs and databases # +###################### +*.sql +*.sqlite +*.mat +*.edges +*.smat +*.labels +*.graphml +*.gephi +*.png +*.eps +*.jpg + +# OS generated files # +###################### +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +Icon? +ehthumbs.db +Thumbs.db +.settings/ +.cproject +.project +.pydevproject +data/ +exp/ + + +#Latex # +############## +*.acn +*.acr +*.alg +*.aux +*.bbl +*.blg +*.dvi +*.fdb_latexmk +*.glg +*.glo +*.gls +*.idx +*.ilg +*.ind +*.ist +*.lof +*.log +*.lot +*.maf +*.mtc +*.mtc0 +*.nav +*.nlo +*.out +*.pdfsync +*.ps +*.snm +*.synctex.gz +*.toc +*.vrb +*.xdy diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..4cd82cd --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,23 @@ +License +------- +**Parallel Maximum Clique (PMC) Library**, +Copyright (C) 2012-2013: Ryan A. Rossi, All rights reserved. + +>This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +>This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +>You should have received a copy of the GNU General Public License +along with this program. If not, see . + +If used, please cite the following manuscript: + + Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa + Patwary, A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs + and Temporal Strong Components, arXiv 2013 diff --git a/Makefile b/Makefile new file mode 100755 index 0000000..ea62e70 --- /dev/null +++ b/Makefile @@ -0,0 +1,39 @@ +# +# Makefile for PMC +# +# Ryan A. Rossi +# Copyright, 2012-2013 +# + +.KEEP_STATE: + +all: pmc + +OPTFLAGS = -O3 +CFLAGS = $(OPTFLAGS) +CXX = g++ +H_FILES = pmc.h + +.cpp.o: + $(CXX) $(CFLAGS) -c $< + +IO_SRC = pmc_utils.cpp \ + pmc_graph.cpp \ + pmc_clique_utils.cpp + +PMC_SRC = pmc_heu.cpp \ + pmc_maxclique.cpp \ + pmcx_maxclique.cpp \ + pmcx_maxclique_basic.cpp + +BOUND_LIB_SRC = pmc_cores.cpp + +PMC_MAIN = pmc_driver.cpp + +OBJ_PMC = $(PMC_MAIN:%.cpp=%.o) $(IO_SRC) $(PMC_SRC) $(BOUND_LIB_SRC) +$(OBJ_PMC): $(H_FILES) Makefile +pmc: $(OBJ_PMC) $(H_FILES) + $(CXX) $(CFLAGS) -o pmc $(OBJ_PMC) -fopenmp + +clean: + rm -rf *.o pmc \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..5558601 --- /dev/null +++ b/README.md @@ -0,0 +1,189 @@ +Parallel Maximum Clique (PMC) Library +===================================== + +In short, a parameterized high performance library for computing maximum cliques in large sparse graphs. + +Finding maximum cliques, k-cliques, and temporal strong components are in general NP-hard. +Yet, these can be computed fast in most social and information networks. +The PMC library is designed to be fast for solving these problems. +Algorithms in the PMC library are easily adaptable for use with a variety of orderings, heuristic strategies, and bounds. + +* **Maximum clique:** Given a simple undirected graph G and a number k, output the clique of largest size. +* **K-clique:** In k-clique, the problem is to find a clique of size k if one exists. +* **Largest temporal-scc:** Given a temporal graph G, a temporal strong component is a set of vertices where all temporal paths exist between the vertices in that set. The Largest TSCC problem is to find the largest among all the temporal strong components. + + + +Features +-------- +0. General framework for parallel maximum clique algorithms +1. Optimized to be fast for large sparse graphs + + Algorithms tested on networks of 1.8 billion edges +2. Set of fast heuristics shown to give accurate approximations +3. Algorithms for computing Temporal Strongly Connected Components (TSCC) of large dynamic networks +4. Parameterized for computing k-cliques as fast as possible +5. Includes a variety of tight linear time bounds for the maximum clique problem +6. Ordering of vertices for each algorithm can be selected at runtime +7. Dynamically reduces the graph representation periodically as vertices are pruned or searched + + Lowers memory-requirements for massive graphs, increases speed, and has caching benefits + + +Synopsis +--------- + +### Setup +First, you'll need to compile the parallel maximum clique library. + + $ cd path/to/pmc/ + $ make + +Afterwards, the following should work: + + # compute maximum clique using the full algorithm `-a 0` + ./pmc -f data/socfb-Texas84.mtx -a 0 + + +*PMC* has been tested on Ubuntu linux (10.10 tested) and Mac OSX (Lion tested) with gcc-mp-4.7 and gcc-mp-4.5.4 + +Please let me know if you run into any issues. + + + +### Input file format ++ Matrix Market Coordinate Format (symmetric) +For details see: + + %%MatrixMarket matrix coordinate pattern symmetric + 4 4 6 + 2 1 + 3 1 + 3 2 + 4 1 + 4 2 + 4 3 + + ++ Edge list (symmetric and unweighted): + Codes for transforming the graph into the correct format are provided in the experiments directory. + + +Overview +--------- + +The parallel maximum clique algorithms use tight bounds that are fast to compute. +A few of those are listed below. + +* K-cores +* Degree +* Neighborhood cores +* Greedy coloring + +All bounds are dynamically updated. + +Examples of the three main maximum clique algorithms are given below. +Each essentially builds on the other. + + # uses the four basic k-core pruning steps + ./pmc -f ../pmc/data/output/socfb-Stanford3.mtx -a 2 + + # k-core pruning and greedy coloring + ./pmc -f ../pmc/data/output/socfb-Stanford3.mtx -a 1 + + # neighborhood core pruning (and ordering for greedy coloring) + ./pmc -f ../pmc/data/output/socfb-Stanford3.mtx -a 0 + + + + + +### Dynamic graph reduction + +The reduction wait parameter `-r` below is set to be 1 second (default = 4 seconds). + + ./pmc -f data/sanr200-0-9.mtx -a 0 -t 2 -r 1 + +In some cases, it may make sense to turn off the explicit graph reduction. +This is done by setting the reduction wait time '-r' to be very large. + + # Set the reduction wait parameter + ./pmc -f data/socfb-Stanford3.mtx -a 0 -t 2 -r 999 + + + + + + +### Orderings + +The PMC algorithms are easily adapted to use various ordering strategies. +To prescribe a vertex ordering, use the -o option with one of the following: ++ `deg` ++ `kcore` ++ `dual_deg`     orders vertices by the sum of degrees from neighbors ++ `dual_kcore`  orders vertices by the sum of core numbers from neighbors ++ `kcore_deg`    vertices are ordered by the weight k(v)d(v) ++ `rand`             randomized ordering of vertices + + + +##### Direction of ordering + +Vertices are searched by default in increasing order, to search vertices in decreasing order, use the `d` option: + + ./pmc -f data/p-hat700-2.mtx -a 0 -d + + + + +### Heuristic +The fast heuristic may also be customized to use various greedy selection strategies. +This is done by using `-h` with one of the following: + ++ `deg` ++ `kcore` ++ `kcore_deg`    select vertex that maximizes k(v)d(v) ++ `rand`             randomly select vertices + + +#### Terminate after applying the heuristic +Approximate the maximum clique using _ONLY_ the heuristic by not setting the exact algorithm via the `-a [num]` option. +For example: + + ./pmc -f data/sanr200-0-9.mtx -h deg + +#### Turning the heuristic off + + # heuristic is turned off by setting `-h 0`. + ./pmc -f data/tscc_enron-only.mtx -h 0 -a 0 + + + +### K-clique + +The parallel maximum clique algorithms have also been parameterized to find cliques of size k. +This routine is useful for many tasks in network analysis such as mining graphs and community detection. + + # Computes a clique of size 50 from the Stanford facebook network + ./pmc -f data/socfb-Stanford3.mtx -a 0 -k 50 + + +using `-o rand` to find potentially different cliques of a certain size + + # Computes a clique of size 36 from sanr200-0-9 + ./pmc -f data/sanr200-0-9.mtx -a 0 -k 36 -o rand + + + +Terms and conditions +-------------------- +Please feel free to use these codes. We only ask that you cite: + + Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa Patwary, + A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs and Temporal + Strong Components, arXiv preprint 1302.6256, 2013. + +_These codes are research prototypes and may not work for you. No promises. But do email if you run into problems._ + + +Copyright 2011-2013, Ryan A. Rossi. All rights reserved. + \ No newline at end of file diff --git a/pmc.h b/pmc.h new file mode 100755 index 0000000..4ecdb26 --- /dev/null +++ b/pmc.h @@ -0,0 +1,32 @@ +/** + ============================================================================ + Name : Parallel Maximum Clique (PMC) Library + Author : Ryan A. Rossi (rrossi@purdue.edu) + Description : A general high-performance parallel framework for computing + maximum cliques. The library is designed to be fast for large + sparse graphs. + + Copyright (C) 2012-2013, Ryan A. Rossi, All rights reserved. + + Please cite the following paper if used: + Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa + Patwary, A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs + and Temporal Strong Components, arXiv preprint 1302.6256, 2013. + + See http://ryanrossi.com/pmc for more information. + ============================================================================ + */ + +#ifndef __PMC_H__ +#define __PMC_H__ + +#include "pmc_headers.h" +#include "pmc_input.h" +#include "pmc_utils.h" + +#include "pmc_heu.h" +#include "pmc_maxclique.h" +#include "pmcx_maxclique.h" +#include "pmcx_maxclique_basic.h" + +#endif diff --git a/pmc_clique_utils.cpp b/pmc_clique_utils.cpp new file mode 100644 index 0000000..08d8218 --- /dev/null +++ b/pmc_clique_utils.cpp @@ -0,0 +1,201 @@ +/** + ============================================================================ + Name : Parallel Maximum Clique (PMC) Library + Author : Ryan A. Rossi (rrossi@purdue.edu) + Description : A general high-performance parallel framework for computing + maximum cliques. The library is designed to be fast for large + sparse graphs. + + Copyright (C) 2012-2013, Ryan A. Rossi, All rights reserved. + + Please cite the following paper if used: + Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa + Patwary, A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs + and Temporal Strong Components, arXiv preprint 1302.6256, 2013. + + See http://ryanrossi.com/pmc for more information. + ============================================================================ + */ + +#include "pmc_graph.h" +#include + +using namespace std; +using namespace pmc; + +int pmc_graph::initial_pruning(pmc_graph& G, int* &pruned, int lb) { + int lb_idx = 0; + for (int i = G.num_vertices()-1; i >= 0; i--) { + if (kcore[kcore_order[i]] == lb) lb_idx = i; + if (kcore[kcore_order[i]] <= lb) pruned[kcore_order[i]] = 1; + } + + double sec = get_time(); + cout << "[pmc: initial k-core pruning] before pruning: |V| = " << G.num_vertices(); + cout << ", |E| = " << G.num_edges() <= 0; i--) { + if (kcore[kcore_order[i]] == lb) lb_idx = i; + if (kcore[kcore_order[i]] <= lb) { + pruned[kcore_order[i]] = 1; + for (long long j = vertices[kcore_order[i]]; j < vertices[kcore_order[i] + 1]; j++) { + adj[kcore_order[i]][edges[j]] = false; + adj[edges[j]][kcore_order[i]] = false; + } + } + } + + double sec = get_time(); + cout << "[pmc: initial k-core pruning] before pruning: |V| = " << G.num_vertices() << ", |E| = " << G.num_edges() < &V, pmc_graph &G, + int &lb_idx, int &lb, string vertex_ordering, bool decr_order) { + + srand (time(NULL)); + int u = 0, val = 0; + for (int k = lb_idx; k < G.num_vertices(); k++) { + if (degree[kcore_order[k]] >= lb - 1) { + u = kcore_order[k]; + + if (vertex_ordering == "deg") + val = vertices[u + 1] - vertices[u]; + else if (vertex_ordering == "kcore") + val = kcore[u]; + else if (vertex_ordering == "kcore_deg") + val = degree[u] * kcore[u]; + else if (vertex_ordering == "rand") + val = rand() % vertices.size(); + // neighbor degrees + else if (vertex_ordering == "dual_deg") { + val = 0; + for (long long j = vertices[u]; j < vertices[u + 1]; j++) { + val = val + G.vertex_degree(edges[j]); + } + } + // neighbor degrees + else if (vertex_ordering == "dual_kcore") { + val = 0; + for (long long j = vertices[u]; j < vertices[u + 1]; j++) { + val = val + kcore[edges[j]]; + } + } + else val = vertices[u + 1] - vertices[u]; + V.push_back(Vertex(u,val)); + } + } + if (decr_order) + std::sort(V.begin(), V.end(), decr_bound); + else + std::sort(V.begin(), V.end(), incr_bound); +} + + +/** + * Reduce the graph by removing the pruned vertices + * + Systematically speeds algorithm up by reducing the neighbors as more vertices are searched + * + * The algorithm below is for parallel maximum clique finders and has the following features: + * + Thread-safe, since local copy of vertices/edges are passed in.. + * + Pruned is a shared variable, but it is safe, since only reads/writes can occur, no deletion + */ +void pmc_graph::reduce_graph( + vector& vs, + vector& es, + int* &pruned, + pmc_graph& G, + int id, + int& mc) { + + int num_vs = vs.size(); + + vector V(num_vs,0); + vector E; + E.reserve(es.size()); + + int start = 0; + for (int i = 0; i < num_vs - 1; i++) { + start = E.size(); + if (!pruned[i]) { //skip these V_local... + for (long long j = vs[i]; j < vs[i + 1]; j++ ) { + if (!pruned[es[j]]) + E.push_back(es[j]); + } + } + V[i] = start; + V[i + 1] = E.size(); + } + vs = V; + es = E; + + // compute k-cores and share bounds: ensure operation completed by single process + #pragma omp single nowait + { + cout << ">>> [pmc: thread " << omp_get_thread_num() + 1 << "]" < &C_max, double &sec) { + cout << "*** [pmc: thread " << omp_get_thread_num() + 1; + cout << "] current max clique = " << C_max.size(); + cout << ", time = " << get_time() - sec << " sec" < &C_max, double sec, double time_limit, bool &time_expired_msg) { + if ((get_time() - sec) > time_limit) { + if (time_expired_msg) { + cout << "\n### Time limit expired, terminating search. ###" <& V, + vector& E, + int* &pruned) { + + long long n, d, i, j, start, num, md; + long long v, u, w, du, pu, pw, md_end; + n = vertices.size(); + + vector pos_tmp(n); + vector core_tmp(n); + vector order_tmp(n); + + md = 0; + for(v=1; v md) md = core_tmp[v]; + } + + md_end = md+1; + vector < int > bin(md_end,0); + + for (v=1; v < n; v++) bin[core_tmp[v]]++; + + start = 1; + for (d=0; d < md_end; d++) { + num = bin[d]; + bin[d] = start; + start = start + num; + } + + for (v=1; v 1; d--) bin[d] = bin[d-1]; + bin[0] = 1; + + for (i = 1; i < n; i++) { + v=order_tmp[i]; + for (j = V[v-1]; j < V[v]; j++) { + u = E[j] + 1; + if (core_tmp[u] > core_tmp[v]) { + du = core_tmp[u]; pu = pos_tmp[u]; + pw = bin[du]; w = order_tmp[pw]; + if (u != w) { + pos_tmp[u] = pw; order_tmp[pu] = w; + pos_tmp[w] = pu; order_tmp[pw] = u; + } + bin[du]++; core_tmp[u]--; + } + } + } + + for (v=0; v pos(n); + if (kcore_order.size() > 0) { + vector tmp(n,0); + kcore = tmp; + kcore_order = tmp; + } + else { + kcore_order.resize(n); + kcore.resize(n); + } + + md = 0; + for (v=1; v md) md = kcore[v]; + } + + md_end = md+1; + vector < int > bin(md_end,0); + + for (v=1; v < n; v++) bin[kcore[v]]++; + + start = 1; + for (d=0; d < md_end; d++) { + num = bin[d]; + bin[d] = start; + start = start + num; + } + + // bucket sort + for (v=1; v 1; d--) bin[d] = bin[d-1]; + bin[0] = 1; + + // kcores + for (i=1; i kcore[v]) { + du = kcore[u]; pu = pos[u]; + pw = bin[du]; w = kcore_order[pw]; + if (u != w) { + pos[u] = pw; kcore_order[pu] = w; + pos[w] = pu; kcore_order[pw] = u; + } + bin[du]++; kcore[u]--; + } + } + } + + for (v = 0; v < n-1; v++) { + kcore[v] = kcore[v+1] + 1; // K + 1 + kcore_order[v] = kcore_order[v+1]-1; + } + max_core = kcore[kcore_order[num_vertices()-1]] - 1; + + bin.clear(); + pos.clear(); +} diff --git a/pmc_driver.cpp b/pmc_driver.cpp new file mode 100644 index 0000000..43de714 --- /dev/null +++ b/pmc_driver.cpp @@ -0,0 +1,119 @@ +/** + ============================================================================ + Name : Parallel Maximum Clique (PMC) Library + Author : Ryan A. Rossi (rrossi@purdue.edu) + Description : A general high-performance parallel framework for computing + maximum cliques. The library is designed to be fast for large + sparse graphs. + + Copyright (C) 2012-2013, Ryan A. Rossi, All rights reserved. + + Please cite the following paper if used: + Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa + Patwary, A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs + and Temporal Strong Components, arXiv preprint 1302.6256, 2013. + + See http://ryanrossi.com/pmc for more information. + ============================================================================ + */ + +#include "pmc.h" + +using namespace std; +using namespace pmc; + +int main(int argc, char *argv[]) { + + //! parse command args + input in(argc, argv); + if (in.help) { + usage(argv[0]); + return 0; + } + + //! read graph + pmc_graph G(in.graph_stats,in.graph); + if (in.graph_stats) { G.bound_stats(in.algorithm, in.lb, G); } + + //! ensure wait time is greater than the time to recompute the graph data structures + if (G.num_edges() > 1000000000 && in.remove_time < 120) in.remove_time = 120; + else if (G.num_edges() > 250000000 && in.remove_time < 10) in.remove_time = 10; + cout << "explicit reduce is set to " << in.remove_time << " seconds" < C; + if (in.lb == 0 && in.heu_strat != "0") { // skip if given as input + pmc_heu maxclique(G,in); + in.lb = maxclique.search(G, C); + cout << "Heuristic found clique of size " << in.lb; + cout << " in " << get_time() - seconds << " seconds" <= 0) { + switch(in.algorithm) { + case 0: { + //! k-core pruning, neigh-core pruning/ordering, dynamic coloring bounds/sort + if (G.num_vertices() < in.adj_limit) { + G.create_adj(); + pmcx_maxclique finder(G,in); + finder.search_dense(G,C); + break; + } + else { + pmcx_maxclique finder(G,in); + finder.search(G,C); + break; + } + } + case 1: { + //! k-core pruning, dynamic coloring bounds/sort + if (G.num_vertices() < in.adj_limit) { + G.create_adj(); + pmcx_maxclique_basic finder(G,in); + finder.search_dense(G,C); + break; + } + else { + pmcx_maxclique_basic finder(G,in); + finder.search(G,C); + break; + } + } + case 2: { + //! simple k-core pruning (four new pruning steps) + pmc_maxclique finder(G,in); + finder.search(G,C); + break; + } + default: + cout << "algorithm " << in.algorithm << " not found." < > vert_list; + int v = 0, u = 0, num_es = 0, self_edges = 0; + + ifstream in_check (filename.c_str()); + if (!in_check) { cout << filename << "File not found!" <> v >> u; + if (v == 0 || u == 0) { + fix_start_idx = false; + break; + } + } + } + ifstream in (filename.c_str()); + if (!in) { cout << filename << "File not found!" <> v >> u; + + if (fix_start_idx) { + v--; + u--; + } + if (v == u) self_edges++; + else { + vert_list[v].push_back(u); + vert_list[u].push_back(v); + } + } + } + vertices.push_back(edges.size()); + for (int i=0; i < vert_list.size(); i++) { + edges.insert(edges.end(),vert_list[i].begin(),vert_list[i].end()); + vertices.push_back(edges.size()); + } + vert_list.clear(); + vertex_degrees(); + cout << "self-loops: " << self_edges < > v_map; + map > valueList; + int col=0, row=0, ridx=0, cidx=0; + int entry_counter = 0, num_of_entries = 0; + double value; + + ifstream in (filename.c_str()); + if(!in) { + cout<0&&line[0]=='%') getline(in,line); + in2.str(line); + in2 >> row >> col >> num_of_entries; + + if(row!=col) { + cout<<"* ERROR: This is not a square matrix."<> ridx >> cidx >> value; + ridx--; + cidx--; + + if (ridx < 0 || ridx >= row) cout << "sym-mtx error: " << ridx << " row " << row << endl; + if (cidx < 0 || cidx >= col) cout << "sym-mtx error: " << cidx << " col " << col << endl; + if (ridx == cidx) continue; + + if (ridx > cidx) { + if (b_getValue) { + if(value > connStrength) { + v_map[ridx].push_back(cidx); + v_map[cidx].push_back(ridx); + if (is_gstats) { + e_v.push_back(ridx); + e_u.push_back(cidx); + } + } + } else { + v_map[ridx].push_back(cidx); + v_map[cidx].push_back(ridx); + if (is_gstats) { + e_v.push_back(ridx); + e_u.push_back(cidx); + } + } + + if (b_getValue && value > connStrength) { + valueList[ridx].push_back(value); + valueList[cidx].push_back(value); + } + } else { + cout << "* WARNING: Found a nonzero in the upper triangular. "; + break; + } + } + } + vertices.push_back(edges.size()); + for (int i=0;i < row; i++) { + edges.insert(edges.end(),v_map[i].begin(),v_map[i].end()); + vertices.push_back(edges.size()); + } + v_map.clear(); + valueList.clear(); + vertex_degrees(); +} + +void pmc_graph::read_metis(const string& filename) { return; }; + +void pmc_graph::create_adj() { + double sec = get_time(); + + int size = num_vertices(); + adj = new bool*[size]; + for (int i = 0; i < size; i++) { + adj[i] = new bool[size]; + memset(adj[i], 0, size * sizeof(bool)); + } + + for (int i = 0; i < num_vertices(); i++) { + for (long long j = vertices[i]; j < vertices[i + 1]; j++ ) + adj[i][edges[j]] = true; + } + cout << "Created adjacency matrix in " << get_time() - sec << " seconds" < 0) { + if (max_degree < degree[v]) max_degree = degree[v]; + p++; + } + } + avg_degree = (double)edges.size() / p; + return; +} + + +void pmc_graph::update_degrees(int* &pruned, int& mc) { + max_degree = -1; + min_degree = std::numeric_limits::max(); + int p = 0; + for (long long v=0; v < num_vertices(); v++) { + degree[v] = vertices[v+1] - vertices[v]; + if (degree[v] < mc) { + if (!pruned[v]) pruned[v] = 1; + p++; + } + else { + if (max_degree < degree[v]) max_degree = degree[v]; + if (degree[v] < min_degree) min_degree = degree[v]; + } + } + avg_degree = (double)edges.size() / p; + cout << ", pruned: " << p << endl; +} + + +void pmc_graph::update_kcores(int* &pruned) { + + long long n, d, i, j, start, num, md; + long long v, u, w, du, pu, pw, md_end; + n = vertices.size(); + kcore.resize(n); + fill(kcore.begin(), kcore.end(), 0); + vector pos_tmp(n); + vector order_tmp(n); + + md = 0; + for(v=1; v md) md = kcore[v]; + } + } + + md_end = md+1; + vector < int > bin(md_end,0); + + for (v=1; v < n; v++) bin[kcore[v]]++; + + start = 1; + for (d=0; d < md_end; d++) { + num = bin[d]; + bin[d] = start; + start = start + num; + } + + for (v=1; v 1; d--) bin[d] = bin[d-1]; + bin[0] = 1; + + for (i = 1; i < n; i++) { + v=order_tmp[i]; + if (!pruned[v-1]) { + for (j = vertices[v-1]; j < vertices[v]; j++) { + if (!pruned[edges[j]]) { + u = edges[j] + 1; + if (kcore[u] > kcore[v]) { + du = kcore[u]; pu = pos_tmp[u]; + pw = bin[du]; w = order_tmp[pw]; + if (u != w) { + pos_tmp[u] = pw; order_tmp[pu] = w; + pos_tmp[w] = pu; order_tmp[pw] = u; + } + bin[du]++; kcore[u]--; + } + } + } + } + } + + max_core = 0; + for (v=0; v max_core) max_core = kcore[v]; + } + else kcore[v] = 0; + } + cout << "[pmc: updated cores] K: " << max_core < V(vertices.size(),0); + vector E; + E.reserve(edges.size()); + + int start = 0; + for (int i = 0; i < num_vertices(); i++) { + start = E.size(); + if (!pruned[i]) { + for (long long j = vertices[i]; j < vertices[i + 1]; j++ ) { + if (!pruned[edges[j]]) + E.push_back(edges[j]); + } + } + V[i] = start; + V[i + 1] = E.size(); + } + vertices = V; + edges = E; +} + + +void pmc_graph::reduce_graph( + vector& vs, + vector& es, + int* &pruned, + int id, + int& mc) { + + int num_vs = vs.size(); + + vector V(num_vs,0); + vector E; + E.reserve(es.size()); + + int start = 0; + for (int i = 0; i < num_vs - 1; i++) { + start = E.size(); + if (!pruned[i]) { //skip these V_local... + for (long long j = vs[i]; j < vs[i + 1]; j++ ) { + if (!pruned[es[j]]) + E.push_back(es[j]); + } + } + V[i] = start; + V[i + 1] = E.size(); + } + vs = V; + es = E; +} + + +void pmc_graph::bound_stats(int alg, int lb, pmc_graph& G) { + cout << "graph: " << fn <& bound, vector& order) { + long long n, d, start, num, md; + long long v, md_end; + + n = bound.size(); + order.reserve(n); + vector < long long > pos(n); + + md = 0; + for(v=1; v md) md = bound[v]; + + md_end = md+1; + vector < long long > bin(md_end,0); + + for (v=1; v < n; v++) bin[bound[v]]++; + + start = 1; + for (d=0; d < md_end; d++) { + num = bin[d]; + bin[d] = start; + start = start + num; + } + + for (v=1; v 1; d--) bin[d] = bin[d-1]; + bin[0] = 1; + + for (v=0; v tmp_edges; + tmp_edges.reserve(edges.size()); + + for (v = 0; v < num_vertices(); v++) { + + n = vertices[v+1] - vertices[v] + 1; + vector vert(n); + vector pos(n); + vector deg(n); + + md = 0; + for(u=1; u md) + md = deg[u]; + } + + md_end = md+1; + vector < int > bin(md_end,0); + + for (u=1; u < n; u++) bin[deg[u]]++; + + start = 1; + for (d=0; d < md_end; d++) { + num = bin[d]; + bin[d] = start; + start = start + num; + } + + for (u=1; u edges; + vector vertices; + vector degree; + int min_degree; + int max_degree; + int avg_degree; + bool is_gstats; + string fn; + bool** adj; + + // constructor + pmc_graph(const string& filename); + pmc_graph(bool graph_stats, const string& filename); + pmc_graph(const string& filename, bool make_adj); + pmc_graph(vector vs, vector es) { + edges = es; + vertices = vs; + vertex_degrees(); + } + // destructor + ~pmc_graph(); + + void read_graph(const string& filename); + void create_adj(); + void reduce_graph(int* &pruned); + void reduce_graph( + vector& vs, + vector& es, + int* &pruned, + int id, + int& mc); + + int num_vertices() { return vertices.size() - 1; } + int num_edges() { return edges.size()/2; } + vector * get_vertices(){ return &vertices; } + vector* get_edges(){ return &edges; } + vector* get_degree(){ return °ree; } + vector get_edges_array() { return edges; } + vector get_vertices_array() { return vertices; }; + vector e_v, e_u, eid; + + int vertex_degree(int v) { return vertices[v] - vertices[v+1]; } + long long first_neigh(int v) { return vertices[v]; } + long long last_neigh(int v) { return vertices[v+1]; } + + void sum_vertex_degrees(); + void vertex_degrees(); + void update_degrees(); + void update_degrees(bool flag); + void update_degrees(int* &pruned, int& mc); + double density() { return (double)num_edges() / (num_vertices() * (num_vertices() - 1.0) / 2.0); } + int get_max_degree() { return max_degree; } + int get_min_degree() { return min_degree; } + double get_avg_degree() { return avg_degree; } + + void initialize(); + string get_file_extension(const string& filename); + void basic_stats(double sec); + void bound_stats(int alg, int lb, pmc_graph& G); + + // vertex sorter + void compute_ordering(vector& bound, vector& order); + void compute_ordering(string degree, vector& order); + // edge sorters + void degree_bucket_sort(); + void degree_bucket_sort(bool desc); + + int max_core; + vector kcore; + vector kcore_order; + vector* get_kcores() { return &kcore; } + vector* get_kcore_ordering() { return &kcore_order; } + int get_max_core() { return max_core; } + void update_kcores(int* &pruned); + + void compute_cores(); + void induced_cores_ordering( + vector& V, + vector& E, + int* &pruned); + + // clique utils + int initial_pruning(pmc_graph& G, int* &pruned, int lb); + int initial_pruning(pmc_graph& G, int* &pruned, int lb, bool** &adj); + void order_vertices(vector &V, pmc_graph &G, + int &lb_idx, int &lb, string vertex_ordering, bool decr_order); + + void print_info(vector &C_max, double &sec); + void print_break(); + bool time_left(vector &C_max, double sec, + double time_limit, bool &time_expired_msg); + void graph_stats(pmc_graph& G, int& mc, int id, double &sec); + + void reduce_graph( + vector& vs, + vector& es, + int* &pruned, + pmc_graph& G, + int id, + int& mc); + + bool clique_test(pmc_graph& G, vector C); + }; + +} +#endif diff --git a/pmc_headers.h b/pmc_headers.h new file mode 100644 index 0000000..7f328d2 --- /dev/null +++ b/pmc_headers.h @@ -0,0 +1,41 @@ +/** + ============================================================================ + Name : Parallel Maximum Clique (PMC) Library + Author : Ryan A. Rossi (rrossi@purdue.edu) + Description : A general high-performance parallel framework for computing + maximum cliques. The library is designed to be fast for large + sparse graphs. + + Copyright (C) 2012-2013, Ryan A. Rossi, All rights reserved. + + Please cite the following paper if used: + Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa + Patwary, A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs + and Temporal Strong Components, arXiv preprint 1302.6256, 2013. + + See http://ryanrossi.com/pmc for more information. + ============================================================================ + */ + +#ifndef PMC_HEADERS_H_ +#define PMC_HEADERS_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +using namespace std; + +#ifndef LINE_LENGTH +#define LINE_LENGTH 256 +#endif +#define NANOSECOND 1000000000 + +#endif diff --git a/pmc_heu.cpp b/pmc_heu.cpp new file mode 100644 index 0000000..802c2bb --- /dev/null +++ b/pmc_heu.cpp @@ -0,0 +1,187 @@ +/** + ============================================================================ + Name : Parallel Maximum Clique (PMC) Library + Author : Ryan A. Rossi (rrossi@purdue.edu) + Description : A general high-performance parallel framework for computing + maximum cliques. The library is designed to be fast for large + sparse graphs. + + Copyright (C) 2012-2013, Ryan A. Rossi, All rights reserved. + + Please cite the following paper if used: + Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa + Patwary, A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs + and Temporal Strong Components, arXiv preprint 1302.6256, 2013. + + See http://ryanrossi.com/pmc for more information. + ============================================================================ + */ + +#include "pmc_heu.h" + +using namespace pmc; +using namespace std; + + +void pmc_heu::branch(vector& P, int sz, + int& mc, vector& C, vector& ind) { + + if (P.size() > 0) { + + int u = P.back().get_id(); + P.pop_back(); + + for (long long j = (*V)[u]; j < (*V)[u + 1]; j++) ind[(*E)[j]] = 1; + + vector R; + R.reserve(P.size()); + for (int i = 0; i < P.size(); i++) + if (ind[P[i].get_id()]) + if ((*K)[P[i].get_id()] > mc) + R.push_back(P[i]); + + for (long long j = (*V)[u]; j < (*V)[u + 1]; j++) ind[(*E)[j]] = 0; + + int mc_prev = mc; + branch(R, sz + 1, mc, C, ind); + + if (mc > mc_prev) C.push_back(u); + + R.clear(); P.clear(); + } + else if (sz > mc) + mc = sz; + return; +} + +int pmc_heu::search_bounds(pmc_graph& G, + vector& C_max) { + + V = G.get_vertices(); + E = G.get_edges(); + degree = G.get_degree(); + vector C, X; + C.reserve(ub); + C_max.reserve(ub); + vector P, T; + P.reserve(G.get_max_degree()+1); + T.reserve(G.get_max_degree()+1); + vector ind(G.num_vertices(),0); + + bool found_ub = false; + int mc = 0, mc_prev, mc_cur, i, v, k, lb_idx = 0; + + #pragma omp parallel for schedule(dynamic) \ + shared(G, X, mc, C_max, lb_idx) private(i, v, P, mc_prev, mc_cur, C, k) firstprivate(ind) + for (i = G.num_vertices()-1; i >= 0; --i) { + if (found_ub) continue; + + v = (*order)[i]; + mc_prev = mc_cur = mc; + + if ((*K)[v] > mc) { + for (long long j = (*V)[v]; j < (*V)[v + 1]; j++) + if ((*K)[(*E)[j]] > mc) + P.push_back( Vertex((*E)[j], compute_heuristic((*E)[j])) ); + + + if (P.size() > mc_cur) { + std::sort(P.begin(), P.end(), incr_heur); + branch(P, 1 , mc_cur, C, ind); + + if (mc_cur > mc_prev) { + if (mc < mc_cur) { + #pragma omp critical + if (mc < mc_cur) { + mc = mc_cur; + C.push_back(v); + C_max = C; + if (mc >= ub) found_ub = true; + print_info(C_max); + } + } + } + } + C = X; P = T; + } + } + cout << "[pmc heuristic]\t mc = " << mc <& C_max, int lb) { + + vector C, X; + C.reserve(ub); + C_max.reserve(ub); + vector P, T; + P.reserve(G.get_max_degree()+1); + T.reserve(G.get_max_degree()+1); + vector ind(G.num_vertices(),0); + + int mc = lb, mc_prev, mc_cur, i; + int lb_idx = 0, v = 0; + for (i = G.num_vertices()-1; i >= 0; i--) { + v = (*order)[i]; + if ((*K)[v] == lb) lb_idx = i; + } + + #pragma omp parallel for schedule(dynamic) \ + shared(G, X, mc, C_max) private(i, v, P, mc_prev, mc_cur, C) firstprivate(ind) + for (i = lb_idx; i <= G.num_vertices()-1; i++) { + + v = (*order)[i]; + mc_prev = mc_cur = mc; + + if ((*K)[v] > mc_cur) { + for (long long j = (*V)[v]; j < (*V)[v + 1]; j++) + if ((*K)[(*E)[j]] > mc_cur) + P.push_back( Vertex((*E)[j], compute_heuristic((*E)[j])) ); + + if (P.size() > mc_cur) { + std::sort(P.begin(), P.end(), incr_heur); + branch(P, 1 , mc_cur, C, ind); + + if (mc_cur > mc_prev) { + if (mc < mc_cur) { + #pragma omp critical + if (mc < mc_cur) { + mc = mc_cur; + C.push_back(v); + C_max = C; + print_info(C_max); + } + } + } + } + } + C = X; P = T; + } + C.clear(); + cout << "[search_cores]\t mc = " << mc <& C_max) { + return search_bounds(G, C_max); +} + + +void pmc_heu::print_info(vector C_max) { + cout << "*** [pmc heuristic: thread " << omp_get_thread_num() + 1; + cout << "] current max clique = " << C_max.size(); + cout << ", time = " << get_time() - sec << " sec" < + +namespace pmc { + + class pmc_heu { + public: + vector* E; + vector* V; + vector* K; + vector* order; + vector* degree; + double sec; + int ub; + string strat; + + pmc_heu(pmc_graph& G, input& params) { + K = G.get_kcores(); + order = G.get_kcore_ordering(); + ub = params.ub; + strat = params.heu_strat; + initialize(); + } + + pmc_heu(pmc_graph& G, int tmp_ub) { + K = G.get_kcores(); + order = G.get_kcore_ordering(); + ub = tmp_ub; + strat = "kcore"; + initialize(); + } + + void initialize() { + sec = get_time(); + srand (time(NULL)); + }; + + int strategy(vector& P); + void set_strategy(string s) { strat = s; } + int compute_heuristic(int v); + + static bool desc_heur(Vertex v, Vertex u) { + return (v.get_bound() > u.get_bound()); + } + + static bool incr_heur(Vertex v, Vertex u) { + return (v.get_bound() < u.get_bound()); + } + + int search(pmc_graph& graph, vector& C_max); + int search_cores(pmc_graph& graph, vector& C_max, int lb); + int search_bounds(pmc_graph& graph, vector& C_max); + + void branch(vector& P, int sz, + int& mc, vector& C, vector& ind); + + void print_info(vector C_max); + }; +}; +#endif diff --git a/pmc_input.h b/pmc_input.h new file mode 100644 index 0000000..636e74b --- /dev/null +++ b/pmc_input.h @@ -0,0 +1,160 @@ +/** + ============================================================================ + Name : Parallel Maximum Clique (PMC) Library + Author : Ryan A. Rossi (rrossi@purdue.edu) + Description : A general high-performance parallel framework for computing + maximum cliques. The library is designed to be fast for large + sparse graphs. + + Copyright (C) 2012-2013, Ryan A. Rossi, All rights reserved. + + Please cite the following paper if used: + Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa + Patwary, A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs + and Temporal Strong Components, arXiv preprint 1302.6256, 2013. + + See http://ryanrossi.com/pmc for more information. + ============================================================================ + */ + +#ifndef PMC_INPUT_H_ +#define PMC_INPUT_H_ + +#include "pmc_headers.h" +#include "pmc_utils.h" + +using namespace std; + +class input { + public: + // instance variables + int algorithm; + int threads; + int experiment; + int lb; + int ub; + int param_ub; + int adj_limit; + double time_limit; + double remove_time; + bool graph_stats; + bool verbose; + bool help; + bool MCE; + bool decreasing_order; + string heu_strat; + string format; + string graph; + string output; + string edge_sorter; + string vertex_search_order; + + input(int argc, char *argv[]) { + // default values + algorithm = -1; + threads = 1; + experiment = 0; + lb = 0; + ub = 0; + param_ub = 0; + adj_limit = 10000; + time_limit = 60 * 60; // max time to search + remove_time = 4.0; // time to wait before reducing graph + verbose = false; + graph_stats = false; + help = false; + MCE = false; + decreasing_order = false; + heu_strat = "kcore"; + vertex_search_order = "deg"; + format = "mtx"; + graph = "data/sample.mtx"; + output = ""; + string edge_sorter = ""; + + int opt; + while ((opt=getopt(argc,argv,"i:t:f:u:l:o:e:a:r:w:h:k:dgsv")) != EOF) { + switch (opt) { + case 'a': + algorithm = atoi(optarg); + if (algorithm > 9) MCE = true; + break; + case 't': + threads = atoi(optarg); + break; + case 'f': + graph = optarg; + break; + case 's': + graph_stats = true; + break; + case 'u': + param_ub = atoi(optarg); // find k-clique fast + lb = 2; // skip heuristic + break; + case 'k': + param_ub = atoi(optarg); + lb = param_ub-1; + break; + case 'l': + lb = atoi(optarg); + break; + case 'h': + heu_strat = optarg; + break; + case 'v': + verbose = 1; + break; + case 'w': + time_limit = atof(optarg) * 60; // convert minutes to seconds + break; + case 'r': + remove_time = atof(optarg); + break; + case 'e': + edge_sorter = optarg; + break; + case 'o': + vertex_search_order = optarg; + break; + case 'd': + // direction of which vertices are ordered + decreasing_order = true; + break; + case '?': + usage(argv[0]); + break; + default: + usage(argv[0]); + break; + } + } + + // both off, use default alg + if (heu_strat == "0" && algorithm == -1) + algorithm = 0; + + if (threads <= 0) threads = 1; + + if (!fexists(graph.c_str())) { + usage(argv[0]); + exit(-1); + } + + FILE* fin = fopen(graph.c_str(), "r+t"); + if (fin == NULL) { + usage(argv[0]); + exit(-1); + } + fclose(fin); + + cout << "\n\nFile Name ------------------------ " << graph.c_str() << endl; + if (!fexists(graph.c_str()) ) { + cout << "File not found!" << endl; + return; + } + omp_set_num_threads(threads); + } + +}; +#endif diff --git a/pmc_maxclique.cpp b/pmc_maxclique.cpp new file mode 100755 index 0000000..7b5399a --- /dev/null +++ b/pmc_maxclique.cpp @@ -0,0 +1,265 @@ +/** + ============================================================================ + Name : Parallel Maximum Clique (PMC) Library + Author : Ryan A. Rossi (rrossi@purdue.edu) + Description : A general high-performance parallel framework for computing + maximum cliques. The library is designed to be fast for large + sparse graphs. + + Copyright (C) 2012-2013, Ryan A. Rossi, All rights reserved. + + Please cite the following paper if used: + Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa + Patwary, A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs + and Temporal Strong Components, arXiv preprint 1302.6256, 2013. + + See http://ryanrossi.com/pmc for more information. + ============================================================================ + */ + +#include "pmc_maxclique.h" + +using namespace std; +using namespace pmc; + +int pmc_maxclique::search(pmc_graph& G, vector& sol) { + + vertices = G.get_vertices(); + edges = G.get_edges(); + degree = G.get_degree(); + int* pruned = new int[G.num_vertices()]; + memset(pruned, 0, G.num_vertices() * sizeof(int)); + int mc = lb, i = 0, u = 0; + + // initial pruning + int lb_idx = G.initial_pruning(G, pruned, lb); + + // set to worst case bound of cores/coloring + vector P, T; + P.reserve(G.get_max_degree()+1); + T.reserve(G.get_max_degree()+1); + + vector C, C_max; + C.reserve(G.get_max_degree()+1); + C_max.reserve(G.get_max_degree()+1); + + // order verts for our search routine + vector V; V.reserve(G.num_vertices()); + G.order_vertices(V,G,lb_idx,lb,vertex_ordering,decr_order); + + vector ind(G.num_vertices(),0); + + #pragma omp parallel for schedule(dynamic) \ + shared(pruned, G, T, V, mc, C_max) firstprivate(ind) private(u, P, C) + for (i = 0; i < (V.size()) - (mc-1); ++i) { + if (G.time_left(C_max,sec,time_limit,time_expired_msg)) { + + u = V[i].get_id(); + if ((*bound)[u] > mc) { + P.push_back(V[i]); + for (long long j = (*vertices)[u]; j < (*vertices)[u + 1]; ++j) + if (!pruned[(*edges)[j]]) + if ((*bound)[(*edges)[j]] > mc) + P.push_back(Vertex((*edges)[j], (*degree)[(*edges)[j]])); + + if (P.size() > mc) { + branch(P, ind, C, C_max, pruned, mc); + } + P = T; + } + pruned[u] = 1; + } + } + + if (pruned) delete[] pruned; + + sol.resize(mc); + for (int i = 0; i < C_max.size(); i++) sol[i] = C_max[i]; + G.print_break(); + return sol.size(); +} + + + + +void pmc_maxclique::branch( + vector &P, + vector& ind, + vector& C, + vector& C_max, + int* &pruned, + int& mc) { + + // stop early if ub is reached + if (not_reached_ub) { + while (P.size() > 0) { + // terminating condition + if (C.size() + P.size() > mc) { + int v = P.back().get_id(); C.push_back(v); + + vector R; R.reserve(P.size()); + for (long long j = (*vertices)[v]; j < (*vertices)[v + 1]; j++) ind[(*edges)[j]] = 1; + + // intersection of N(v) and P - {v} + for (int k = 0; k < P.size() - 1; k++) + if (ind[P[k].get_id()]) + if (!pruned[P[k].get_id()]) + if ((*bound)[P[k].get_id()] > mc) + R.push_back(P[k]); + + for (long long j = (*vertices)[v]; j < (*vertices)[v + 1]; j++) ind[(*edges)[j]] = 0; + + if (R.size() > 0) { + branch(R, ind, C, C_max, pruned, mc); + } + else if (C.size() > mc) { + // obtain lock + #pragma omp critical (update_mc) + if (C.size() > mc) { + // ensure updated max is flushed + mc = C.size(); + C_max = C; + print_mc_info(C,sec); + if (mc >= param_ub) { + not_reached_ub = false; + cout << "[pmc: upper bound reached] omega = " << mc <& sol) { + + vertices = G.get_vertices(); + edges = G.get_edges(); + degree = G.get_degree(); + bool** adj = G.adj; + + int* pruned = new int[G.num_vertices()]; + memset(pruned, 0, G.num_vertices() * sizeof(int)); + int mc = lb, i = 0, u = 0; + + // initial pruning + int lb_idx = G.initial_pruning(G, pruned, lb, adj); + + // set to worst case bound of cores + vector P, T; + P.reserve(G.get_max_degree()+1); + T.reserve(G.get_max_degree()+1); + + vector C, C_max; + C.reserve(G.get_max_degree()+1); + C_max.reserve(G.get_max_degree()+1); + + // order verts for our search routine + vector V; V.reserve(G.num_vertices()); + G.order_vertices(V,G,lb_idx,lb,vertex_ordering,decr_order); + + vector ind(G.num_vertices(),0); + + #pragma omp parallel for schedule(dynamic) \ + shared(pruned, G, adj, T, V, mc, C_max) firstprivate(ind) private(u, P, C) + for (i = 0; i < (V.size()) - (mc-1); ++i) { + if (G.time_left(C_max,sec,time_limit,time_expired_msg)) { + + u = V[i].get_id(); + if ((*bound)[u] > mc) { + P.push_back(V[i]); + for (long long j = (*vertices)[u]; j < (*vertices)[u + 1]; ++j) + if (!pruned[(*edges)[j]]) + if ((*bound)[(*edges)[j]] > mc) + P.push_back(Vertex((*edges)[j], (*degree)[(*edges)[j]])); + + if (P.size() > mc) { + branch_dense(P, ind, C, C_max, pruned, mc, adj); + } + P = T; + } + pruned[u] = 1; + for (long long j = (*vertices)[u]; j < (*vertices)[u + 1]; j++) { + adj[u][(*edges)[j]] = false; + adj[(*edges)[j]][u] = false; + } + } + } + if (pruned) delete[] pruned; + + sol.resize(mc); + for (int i = 0; i < C_max.size(); i++) sol[i] = C_max[i]; + G.print_break(); + return sol.size(); +} + + + +void pmc_maxclique::branch_dense( + vector &P, + vector& ind, + vector& C, + vector& C_max, + int* &pruned, + int& mc, + bool** &adj) { + + // stop early if ub is reached + if (not_reached_ub) { + while (P.size() > 0) { + // terminating condition + if (C.size() + P.size() > mc) { + int v = P.back().get_id(); C.push_back(v); + vector R; R.reserve(P.size()); + + for (int k = 0; k < P.size() - 1; k++) + // indicates neighbor AND pruned + if (adj[v][P[k].get_id()]) + if ((*bound)[P[k].get_id()] > mc) + R.push_back(P[k]); + + if (R.size() > 0) { + branch_dense(R, ind, C, C_max, pruned, mc, adj); + } + else if (C.size() > mc) { + // obtain lock + #pragma omp critical (update_mc) + if (C.size() > mc) { + // ensure updated max is flushed + mc = C.size(); + C_max = C; + print_mc_info(C,sec); + if (mc >= param_ub) { + not_reached_ub = false; + cout << "[pmc: upper bound reached] omega = " << mc < +#include +#include +#include +#include +#include "pmc_headers.h" +#include "pmc_utils.h" +#include "pmc_graph.h" +#include "pmc_input.h" +#include "pmc_vertex.h" + +using namespace std; + +namespace pmc { + + class pmc_maxclique { + public: + vector* edges; + vector* vertices; + vector* bound; + vector* order; + vector* degree; + int param_ub; + int ub; + int lb; + double time_limit; + double sec; + double wait_time; + bool not_reached_ub; + bool time_expired_msg; + bool decr_order; + + string vertex_ordering; + int edge_ordering; + int style_bounds; + int style_dynamic_bounds; + + int num_threads; + + void initialize() { + vertex_ordering = "kcore"; + edge_ordering = 0; + style_bounds = 0; + style_dynamic_bounds = 0; + not_reached_ub = true; + time_expired_msg = true; + decr_order = false; + } + + void setup_bounds(input& params) { + lb = params.lb; + ub = params.ub; + param_ub = params.param_ub; + if (param_ub == 0) + param_ub = ub; + time_limit = params.time_limit; + wait_time = params.remove_time; + sec = get_time(); + + num_threads = params.threads; + } + + + pmc_maxclique(pmc_graph& G, input& params) { + bound = G.get_kcores(); + order = G.get_kcore_ordering(); + setup_bounds(params); + initialize(); + vertex_ordering = params.vertex_search_order; + decr_order = params.decreasing_order; + } + + ~pmc_maxclique() {}; + + int search(pmc_graph& G, vector& sol); + + void branch( + vector &P, + vector& ind, + vector& C, + vector& C_max, + int* &pruned, + int& mc); + + + int search_dense(pmc_graph& G, vector& sol); + + void branch_dense( + vector &P, + vector& ind, + vector& C, + vector& C_max, + int* &pruned, + int& mc, + bool** &adj); + + }; +}; + +#endif diff --git a/pmc_neigh_coloring.h b/pmc_neigh_coloring.h new file mode 100644 index 0000000..f78c102 --- /dev/null +++ b/pmc_neigh_coloring.h @@ -0,0 +1,148 @@ +/** + ============================================================================ + Name : Parallel Maximum Clique (PMC) Library + Author : Ryan A. Rossi (rrossi@purdue.edu) + Description : A general high-performance parallel framework for computing + maximum cliques. The library is designed to be fast for large + sparse graphs. + + Copyright (C) 2012-2013, Ryan A. Rossi, All rights reserved. + + Please cite the following paper if used: + Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa + Patwary, A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs + and Temporal Strong Components, arXiv preprint 1302.6256, 2013. + + See http://ryanrossi.com/pmc for more information. + ============================================================================ + */ + +#ifndef PMC_NEIGH_COLORING_H_ +#define PMC_NEIGH_COLORING_H_ + +#include "pmc_vertex.h" + +using namespace std; + +namespace pmc { + + // sequential dynamic greedy coloring and sort + static void neigh_coloring_bound( + vector& vs, + vector& es, + vector &P, + vector& ind, + vector& C, + vector& C_max, + vector< vector >& colors, + int* pruned, + int& mc) { + + int j = 0, u = 0, k = 1, k_prev = 0; + int max_k = 1; + int min_k = mc - C.size() + 1; + colors[1].clear(); colors[2].clear(); + + for (int w=0; w < P.size(); w++) { + u = P[w].get_id(); + k = 1, k_prev = 0; + + for (long long h = vs[u]; h < vs[u + 1]; h++) + ind[es[h]] = 1; + + while (k > k_prev) { + k_prev = k; + for (int i = 0; i < colors[k].size(); i++) { + if (ind[colors[k][i]]) { + k++; + break; + } + } + } + + for (long long h = vs[u]; h < vs[u + 1]; h++) ind[es[h]] = 0; + + if (k > max_k) { + max_k = k; + colors[max_k+1].clear(); + } + + colors[k].push_back(u); + if (k < min_k) { + P[j].set_id(u); + j++; + } + } + + if (j > 0) P[j-1].set_bound(0); + if (min_k <= 0) min_k = 1; + + // start from min_k and add all Verts with a larger core + for (k = min_k; k <= max_k; k++) + for (int w = 0; w < colors[k].size(); w++) { + P[j].set_id(colors[k][w]); + P[j].set_bound(k); + j++; + } + } + + // sequential dynamic greedy coloring and sort + static void neigh_coloring_dense( + vector& vs, + vector& es, + vector &P, + vector& ind, + vector& C, + vector& C_max, + vector< vector >& colors, + int& mc + ,bool** &adj + ) { + + int j = 0, u = 0, k = 1, k_prev = 0; + int max_k = 1; + int min_k = mc - C.size() + 1; + + colors[1].clear(); colors[2].clear(); + + for (int w=0; w < P.size(); w++) { + u = P[w].get_id(); + k = 1, k_prev = 0; + + while (k > k_prev) { + k_prev = k; + for (int i = 0; i < colors[k].size(); i++) { //use ind directly, no need to use Verts/edges + if (adj[u][colors[k][i]]) { + k++; + break; + } + } + } + + if (k > max_k) { + max_k = k; + colors[max_k+1].clear(); + } + + colors[k].push_back(u); + if (k < min_k) { + P[j].set_id(u); + j++; + } + } + + if (j > 0) P[j-1].set_bound(0); + + if (min_k <= 0) min_k = 1; + + // start from min_k and add all Verts with a larger core + for (k = min_k; k <= max_k; k++) + for (int w = 0; w < colors[k].size(); w++) { + P[j].set_id(colors[k][w]); + P[j].set_bound(k); + j++; + } + } + +} +#endif diff --git a/pmc_neigh_cores.h b/pmc_neigh_cores.h new file mode 100644 index 0000000..2681ea7 --- /dev/null +++ b/pmc_neigh_cores.h @@ -0,0 +1,252 @@ +/** + ============================================================================ + Name : Parallel Maximum Clique (PMC) Library + Author : Ryan A. Rossi (rrossi@purdue.edu) + Description : A general high-performance parallel framework for computing + maximum cliques. The library is designed to be fast for large + sparse graphs. + + Copyright (C) 2012-2013, Ryan A. Rossi, All rights reserved. + + Please cite the following paper if used: + Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa + Patwary, A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs + and Temporal Strong Components, arXiv preprint 1302.6256, 2013. + + See http://ryanrossi.com/pmc for more information. + ============================================================================ + */ + +#ifndef PMC_NEIGH_CORES_H_ +#define PMC_NEIGH_CORES_H_ + +#include "pmc_vertex.h" + +using namespace std; + +namespace pmc { + + static void neigh_cores_bound( + vector& vs, + vector& es, + vector &P, + vector& ind, + int& mc) { + + int n = P.size() + 1; + + // lookup table + vector newids_to_actual(n, 0); + vector vert_order(n,0); + vector deg(n,0); + vector pos(n,0); + + // lookup table for neighbors + for (int v = 1; v < n; v++) ind[P[v-1].get_id()] = 1; + + // compute degrees of induced neighborhood + int md = 0, x, u; + for (int v = 1; v < n; v++) { // for each v in P + u = P[v-1].get_id(); + x = 0; + for (long long j=vs[u]; j md) md = deg[v]; + } + + int md_end = md+1; + vector bin(md_end,0); + for (int v = 1; v < n; v++) bin[deg[v]]++; + + int start = 1, num = 0; + for (int d=0; d < md_end; d++) { //for each deg, set bin to be the pos of the first vertex of that degree + num = bin[d]; + bin[d] = start; + start = start + num; + } + + + for (int v=1; v 1; d--) bin[d] = bin[d-1]; + bin[0] = 1; + + + int v_newid, v_actual, u_newid, du, pu, pw, w; + long long j = 0; + for (int i = 1; i < n; i++) { // neighborhood K-cores + v_newid = vert_order[i]; //relabeled id + v_actual = newids_to_actual[v_newid]; // real id + for (j = vs[v_actual]; j 0) { // find common induced neighbors of k + + u_newid = ind[es[j]]; + if (deg[u_newid] > deg[v_newid]) { + du = deg[u_newid]; + pu = pos[u_newid]; + pw = bin[du]; + w = vert_order[pw]; + if (u_newid != w) { + pos[u_newid] = pw; + vert_order[pu] = w; + pos[w] = pu; + vert_order[pw] = u_newid; + } + bin[du]++; deg[u_newid]--; + } + } + } + } + + // reset index + for (int v=1; v 0; --i) { + u = vert_order[i]; + if (deg[u]+1 > mc) { + P[id].set_bound(deg[u]); + P[id].set_id(newids_to_actual[u]); + id++; + } + else prune_vert++; + } + + // remove pruned verts from P + for (int i = 0; i < prune_vert; i++) + P.pop_back(); + } + + + static void neigh_cores_tight( + vector& vs, + vector& es, + vector &P, + vector& ind, + int& mc) { + + int n = P.size() + 1; + + // lookup table + vector newids_to_actual(n, 0); + vector vert_order(n,0); + vector deg(n,0); + vector pos(n,0); + + + // lookup table for neighbors + for (int v = 1; v < n; v++) ind[P[v-1].get_id()] = 1; + + // compute degrees of induced neighborhood + int md = 0, x, u; + for (int v = n-1; v >= 1; v--) { + u = P[v-1].get_id(); + x = 0; + for (long long j=vs[u]; j= mc) { + deg[v] = x; + if (deg[v] > md) md = deg[v]; + } + else { + deg[v] = 0; + ind[P[v-1].get_id()] = 0; + } + } + + int md_end = md+1; + vector bin(md_end,0); + for (int v = 1; v < n; v++) bin[deg[v]]++; + + int start = 1, num = 0; + for (int d=0; d < md_end; d++) { //for each deg, set bin to be the pos of the first vertex of that degree + num = bin[d]; + bin[d] = start; + start = start + num; + } + + + for (int v=1; v 0) { + pos[v] = bin[deg[v]]; + + //view this step as relabeling the vertices + vert_order[pos[v]] = v; + ind[P[v-1].get_id()] = v; // set bit for actual vertex id + newids_to_actual[v] = P[v-1].get_id(); + bin[deg[v]]++; + } + else + ind[P[v-1].get_id()] = 0; + } + + for (int d=md; d > 1; d--) bin[d] = bin[d-1]; + bin[0] = 1; + + + int v_newid, v_actual, u_newid, du, pu, pw, w; + long long j = 0; + for (int i = 1; i < n; i++) { // neighborhood K-cores + v_newid = vert_order[i]; + if (deg[v_newid] > 0) { + //relabeled id + v_actual = newids_to_actual[v_newid]; // real id + for (j = vs[v_actual]; j 0) { // find common induced neighbors of k + + u_newid = ind[es[j]]; + if (deg[u_newid] > deg[v_newid]) { + du = deg[u_newid]; + pu = pos[u_newid]; + pw = bin[du]; + w = vert_order[pw]; + if (u_newid != w) { + pos[u_newid] = pw; + vert_order[pu] = w; + pos[w] = pu; + vert_order[pw] = u_newid; + } + bin[du]++; deg[u_newid]--; + } + } + } + } + } + + // reset index + for (int v=1; v 0; --i) { + u = vert_order[i]; + if (deg[u]+1 > mc) { + P[id].set_bound(deg[u]); + P[id].set_id(newids_to_actual[u]); + id++; + } + else prune_vert++; + } + + // remove pruned verts from P + for (int i = 0; i < prune_vert; i++) + P.pop_back(); + } +} + + +#endif diff --git a/pmc_utils.cpp b/pmc_utils.cpp new file mode 100755 index 0000000..6af3e53 --- /dev/null +++ b/pmc_utils.cpp @@ -0,0 +1,124 @@ +/** + ============================================================================ + Name : Parallel Maximum Clique (PMC) Library + Author : Ryan A. Rossi (rrossi@purdue.edu) + Description : A general high-performance parallel framework for computing + maximum cliques. The library is designed to be fast for large + sparse graphs. + + Copyright (C) 2012-2013, Ryan A. Rossi, All rights reserved. + + Please cite the following paper if used: + Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa + Patwary, A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs + and Temporal Strong Components, arXiv preprint 1302.6256, 2013. + + See http://ryanrossi.com/pmc for more information. + ============================================================================ + */ + +#include "pmc_utils.h" + +using namespace std; + +bool fexists(const char *filename) { + ifstream ifile(filename); + return ifile; +} + +void usage(char *argv0) { + const char *params = + "Usage: %s -a alg -f graphfile -t threads -o ordering -h heu_strat -u upper_bound -l lower_bound -r reduce_wait_time -w time_limit \n" + "\t-a algorithm : Algorithm for solving MAX-CLIQUE: 0 = full, 1 = no neighborhood cores, 2 = only basic k-core pruning steps \n" + "\t-f graph file : Input GRAPH file for computing the Maximum Clique (matrix market format or simple edge list). \n" + "\t-o vertex search ordering : Order in which vertices are searched (default = deg, [kcore, dual_deg, dual_kcore, kcore_deg, rand]) \n" + "\t-d decreasing order : Search vertices in DECREASING order. Note if '-d' is not set, then vertices are searched in increasing order by default. \n" + "\t-e neigh/edge ordering : Ordering of neighbors/edges (default = deg, [kcore, dual_deg, dual_kcore, kcore_deg, rand]) \n" + "\t-h heuristic strategy : Strategy for HEURISTIC method (default = kcore, [deg, dual_deg, dual_kcore, rand, 0 = skip heuristic]) \n" + "\t-u upper_bound : UPPER-BOUND on clique size (default = K-cores).\n" + "\t-l lower_bound : LOWER-BOUND on clique size (default = Estimate using the Fast Heuristic). \n" + "\t-t threads : Number of THREADS for the algorithm to use (default = 1). \n" + "\t-r reduce_wait : Number of SECONDS to wait before inducing the graph based on the unpruned vertices (default = 4 seconds). \n" + "\t-w time_limit : Execution TIME LIMIT spent searching for max clique (default = 7 days) \n" + "\t-k clique size : Solve K-CLIQUE problem: find clique of size k if it exists. Parameterized to be fast. \n" + "\t-s stats : Compute BOUNDS and other fast graph stats \n" + "\t-v verbose : Output additional details to the screen. \n" + "\t-? options : Print out this help menu. \n"; + fprintf(stderr, params, argv0); + exit(-1); +} + + +double get_time() { + timeval t; + gettimeofday(&t, NULL); + return t.tv_sec*1.0 + t.tv_usec/1000000.0; +} + +string memory_usage() { + ostringstream mem; + ifstream proc("/proc/self/status"); + string s; + while(getline(proc, s), !proc.fail()) { + if(s.substr(0, 6) == "VmSize") { + mem << s; + return mem.str(); + } + } + return mem.str(); +} + +void indent(int level, string str) { + for (int i = 0; i < level; i++) + cout << " "; + cout << "(" << level << ") "; +} + +void print_max_clique(vector& C) { + cout << "Maximum clique: "; + for(int i = 0; i < C.size(); i++) + cout << C[i] + 1 << " "; + cout << endl; +} + +void print_n_maxcliques(set< vector > C, int n) { + set< vector >::iterator it; + int mc = 0; + for( it = C.begin(); it != C.end(); it++) { + if (mc < n) { + cout << "Maximum clique: "; + const vector& clq = (*it); + for (int j = 0; j < clq.size(); j++) + cout << clq[j] << " "; + cout < &files) { + DIR *dp; + struct dirent *dirp; + if((dp = opendir(dir.c_str())) == NULL) { + cout << "Error(" << errno << ") opening " << dir << endl; + return errno; + } + + while ((dirp = readdir(dp)) != NULL) { + if (dirp->d_name != ".") + files.push_back(string(dirp->d_name)); + } + closedir(dp); + return 0; +} + + diff --git a/pmc_utils.h b/pmc_utils.h new file mode 100644 index 0000000..aad7879 --- /dev/null +++ b/pmc_utils.h @@ -0,0 +1,53 @@ +/** + ============================================================================ + Name : Parallel Maximum Clique (PMC) Library + Author : Ryan A. Rossi (rrossi@purdue.edu) + Description : A general high-performance parallel framework for computing + maximum cliques. The library is designed to be fast for large + sparse graphs. + + Copyright (C) 2012-2013, Ryan A. Rossi, All rights reserved. + + Please cite the following paper if used: + Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa + Patwary, A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs + and Temporal Strong Components, arXiv preprint 1302.6256, 2013. + + See http://ryanrossi.com/pmc for more information. + ============================================================================ + */ + +#ifndef PMC_UTILS_H_ +#define PMC_UTILS_H_ + +#include +#include +#include +#include +#include "assert.h" +#include +#include +#include +#include +#include +#include "pmc_headers.h" + + +using namespace std; + +bool fexists(const char *filename); +void usage(char *argv0); + +double get_time(); +string memory_usage(); + +void validate(bool condition, const string& msg); + +void indent(int level); +void indent(int level, string str); +void print_max_clique(vector& max_clique_data); +void print_n_maxcliques(set< vector > C, int n); + +int getdir (string dir, vector &files); + +#endif diff --git a/pmc_vertex.h b/pmc_vertex.h new file mode 100644 index 0000000..3fe839c --- /dev/null +++ b/pmc_vertex.h @@ -0,0 +1,52 @@ +/** + ============================================================================ + Name : Parallel Maximum Clique (PMC) Library + Author : Ryan A. Rossi (rrossi@purdue.edu) + Description : A general high-performance parallel framework for computing + maximum cliques. The library is designed to be fast for large + sparse graphs. + + Copyright (C) 2012-2013, Ryan A. Rossi, All rights reserved. + + Please cite the following paper if used: + Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa + Patwary, A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs + and Temporal Strong Components, arXiv preprint 1302.6256, 2013. + + See http://ryanrossi.com/pmc for more information. + ============================================================================ + */ + +#ifndef PMC_VERTEX_H_ +#define PMC_VERTEX_H_ + +using namespace std; + +namespace pmc { + class Vertex { + private: + int id, b; + public: + Vertex(int vertex_id, int bound): id(vertex_id), b(bound) {}; + + void set_id(int vid) { id = vid; } + int get_id() { return id; } + + void set_bound(int value) { b = value; } + int get_bound() { return b; } + }; + + static bool decr_bound(Vertex v, Vertex u) { + return (v.get_bound() > u.get_bound()); + } + static bool incr_bound(Vertex v, Vertex u) { + return (v.get_bound() < u.get_bound()); + }; + + static void print_mc_info(vector &C_max, double &sec) { + cout << "*** [pmc: thread " << omp_get_thread_num() + 1; + cout << "] current max clique = " << C_max.size(); + cout << ", time = " << get_time() - sec << " sec" <& sol) { + + vertices = G.get_vertices(); + edges = G.get_edges(); + degree = G.get_degree(); + int* pruned = new int[G.num_vertices()]; + memset(pruned, 0, G.num_vertices() * sizeof(int)); + int mc = lb, i = 0, u = 0; + + // initial pruning + int lb_idx = G.initial_pruning(G, pruned, lb); + + // set to worst case bound of cores/coloring + vector P, T; + P.reserve(G.get_max_degree()+1); + T.reserve(G.get_max_degree()+1); + + vector C, C_max; + C.reserve(G.get_max_degree()+1); + C_max.reserve(G.get_max_degree()+1); + + // init the neigh coloring array + vector< vector > colors(G.get_max_degree()+1); + for (int i = 0; i < G.get_max_degree()+1; i++) colors[i].reserve(G.get_max_degree()+1); + + // order verts for our search routine + vector V; + V.reserve(G.num_vertices()); + G.order_vertices(V,G,lb_idx,lb,vertex_ordering,decr_order); + cout << "|V| = " << V.size() < ind(G.num_vertices(),0); + vector es = G.get_edges_array(); + vector vs = G.get_vertices_array(); + + vector induce_time(num_threads,get_time()); + for (int t = 0; t < num_threads; ++t) induce_time[t] = induce_time[t] + t/4; + + #pragma omp parallel for schedule(dynamic) shared(pruned, G, T, V, mc, C_max, induce_time) \ + firstprivate(colors,ind,vs,es) private(u, P, C) + for (i = 0; i < (V.size()) - (mc-1); ++i) { + if (not_reached_ub) { + if (G.time_left(C_max,sec,time_limit,time_expired_msg)) { + + u = V[i].get_id(); + if ((*bound)[u] > mc) { + P.push_back(V[i]); + for (long long j = vs[u]; j < vs[u + 1]; ++j) + if (!pruned[es[j]]) + if ((*bound)[es[j]] > mc) + P.push_back(Vertex(es[j], (*degree)[es[j]])); + + + if (P.size() > mc) { + neigh_cores_bound(vs,es,P,ind,mc); + if (P.size() > mc && P[0].get_bound() >= mc) { + neigh_coloring_bound(vs,es,P,ind,C,C_max,colors,pruned,mc); + if (P.back().get_bound() > mc) { + branch(vs,es,P, ind, C, C_max, colors, pruned, mc); + } + } + } + P = T; + } + pruned[u] = 1; + + // dynamically reduce graph in a thread-safe manner + if ((get_time() - induce_time[omp_get_thread_num()]) > wait_time) { + G.reduce_graph( vs, es, pruned, G, i+lb_idx, mc); + G.graph_stats(G, mc, i+lb_idx, sec); + induce_time[omp_get_thread_num()] = get_time(); + } + } + } + } + + if (pruned) delete[] pruned; + + sol.resize(mc); + for (int i = 0; i < C_max.size(); i++) sol[i] = C_max[i]; + G.print_break(); + return sol.size(); +} + + + + +void pmcx_maxclique::branch( + vector& vs, + vector& es, + vector &P, + vector& ind, + vector& C, + vector& C_max, + vector< vector >& colors, + int* &pruned, + int& mc) { + + // stop early if ub is reached + if (not_reached_ub) { + while (P.size() > 0) { + // terminating condition + if (C.size() + P.back().get_bound() > mc) { + int v = P.back().get_id(); C.push_back(v); + + vector R; R.reserve(P.size()); + for (long long j = vs[v]; j < vs[v + 1]; j++) ind[es[j]] = 1; + + // intersection of N(v) and P - {v} + for (int k = 0; k < P.size() - 1; k++) + if (ind[P[k].get_id()]) + if (!pruned[P[k].get_id()]) + if ((*bound)[P[k].get_id()] > mc) + R.push_back(P[k]); + + for (long long j = vs[v]; j < vs[v + 1]; j++) ind[es[j]] = 0; + + + if (R.size() > 0) { + // color graph induced by R and sort for O(1) + neigh_coloring_bound(vs, es, R, ind, C, C_max, colors, pruned, mc); + branch(vs, es, R, ind, C, C_max, colors, pruned, mc); + } + else if (C.size() > mc) { + // obtain lock + #pragma omp critical (update_mc) + if (C.size() > mc) { + // ensure updated max is flushed + mc = C.size(); + C_max = C; + print_mc_info(C,sec); + if (mc >= param_ub) { + not_reached_ub = false; + cout << "[pmc: upper bound reached] omega = " << mc <& sol) { + + vertices = G.get_vertices(); + edges = G.get_edges(); + degree = G.get_degree(); + bool** adj = G.adj; + + int* pruned = new int[G.num_vertices()]; + memset(pruned, 0, G.num_vertices() * sizeof(int)); + int mc = lb, i = 0, u = 0; + + // initial pruning + int lb_idx = G.initial_pruning(G, pruned, lb, adj); + + // set to worst case bound of cores/coloring + vector P, T; + P.reserve(G.get_max_degree()+1); + T.reserve(G.get_max_degree()+1); + + vector C, C_max; + C.reserve(G.get_max_degree()+1); + C_max.reserve(G.get_max_degree()+1); + + // init the neigh coloring array + vector< vector > colors(G.get_max_degree()+1); + for (int i = 0; i < G.get_max_degree()+1; i++) colors[i].reserve(G.get_max_degree()+1); + + // order verts for our search routine + vector V; + V.reserve(G.num_vertices()); + G.order_vertices(V,G,lb_idx,lb,vertex_ordering,decr_order); + cout << "|V| = " << V.size() < ind(G.num_vertices(),0); + vector es = G.get_edges_array(); + vector vs = G.get_vertices_array(); + + vector induce_time(num_threads,get_time()); + for (int t = 0; t < num_threads; ++t) induce_time[t] = induce_time[t] + t/4; + + + #pragma omp parallel for schedule(dynamic) shared(pruned, G, adj, T, V, mc, C_max, induce_time) \ + firstprivate(colors,ind,vs,es) private(u, P, C) + for (i = 0; i < (V.size()) - (mc-1); ++i) { + if (not_reached_ub) { + if (G.time_left(C_max,sec,time_limit,time_expired_msg)) { + + u = V[i].get_id(); + if ((*bound)[u] > mc) { + P.push_back(V[i]); + for (long long j = vs[u]; j < vs[u + 1]; ++j) + if (!pruned[es[j]]) + if ((*bound)[es[j]] > mc) + P.push_back(Vertex(es[j], (*degree)[es[j]])); + + if (P.size() > mc) { + // neighborhood core ordering and pruning + neigh_cores_bound(vs,es,P,ind,mc); + if (P.size() > mc && P[0].get_bound() >= mc) { + neigh_coloring_dense(vs,es,P,ind,C,C_max,colors,mc, adj); + if (P.back().get_bound() > mc) { + branch_dense(vs,es,P, ind, C, C_max, colors, pruned, mc, adj); + } + } + } + P = T; + } + pruned[u] = 1; + for (long long j = vs[u]; j < vs[u + 1]; j++) { + adj[u][es[j]] = false; + adj[es[j]][u] = false; + } + + // dynamically reduce graph in a thread-safe manner + if ((get_time() - induce_time[omp_get_thread_num()]) > wait_time) { + G.reduce_graph( vs, es, pruned, G, i+lb_idx, mc); + G.graph_stats(G, mc, i+lb_idx, sec); + induce_time[omp_get_thread_num()] = get_time(); + } + } + } + } + + if (pruned) delete[] pruned; + + sol.resize(mc); + for (int i = 0; i < C_max.size(); i++) sol[i] = C_max[i]; + G.print_break(); + return sol.size(); +} + + + + +void pmcx_maxclique::branch_dense( + vector& vs, + vector& es, + vector &P, + vector& ind, + vector& C, + vector& C_max, + vector< vector >& colors, + int* &pruned, + int& mc, + bool** &adj) { + + // stop early if ub is reached + if (not_reached_ub) { + while (P.size() > 0) { + // terminating condition + if (C.size() + P.back().get_bound() > mc) { + int v = P.back().get_id(); C.push_back(v); + vector R; R.reserve(P.size()); + + for (int k = 0; k < P.size() - 1; k++) + // indicates neighbor AND pruned, since threads dynamically update it + if (adj[v][P[k].get_id()]) + if ((*bound)[P[k].get_id()] > mc) + R.push_back(P[k]); + + + if (R.size() > 0) { + // color graph induced by R and sort for O(1) + neigh_coloring_dense(vs, es, R, ind, C, C_max, colors, mc, adj); + branch_dense(vs, es, R, ind, C, C_max, colors, pruned, mc, adj); + } + else if (C.size() > mc) { + // obtain lock + #pragma omp critical (update_mc) + if (C.size() > mc) { + // ensure updated max is flushed + mc = C.size(); + C_max = C; + print_mc_info(C,sec); + if (mc >= param_ub) { + not_reached_ub = false; + cout << "[pmc: upper bound reached] omega = " << mc < +#include +#include +#include +#include "pmc_headers.h" +#include "pmc_utils.h" +#include "pmc_graph.h" +#include "pmc_input.h" +#include "pmc_vertex.h" +#include "pmc_neigh_cores.h" +#include "pmc_neigh_coloring.h" +#include + +using namespace std; + +namespace pmc { + + class pmcx_maxclique { + public: + vector* edges; + vector* vertices; + vector* bound; + vector* order; + vector* degree; + int param_ub; + int ub; + int lb; + double time_limit; + double sec; + double wait_time; + bool not_reached_ub; + bool time_expired_msg; + bool decr_order; + + string vertex_ordering; + int edge_ordering; + int style_bounds; + int style_dynamic_bounds; + + int num_threads; + + void initialize() { + vertex_ordering = "deg"; + edge_ordering = 0; + style_bounds = 0; + style_dynamic_bounds = 0; + not_reached_ub = true; + time_expired_msg = true; + decr_order = false; + } + + void setup_bounds(input& params) { + lb = params.lb; + ub = params.ub; + param_ub = params.param_ub; + if (param_ub == 0) + param_ub = ub; + time_limit = params.time_limit; + wait_time = params.remove_time; + sec = get_time(); + + num_threads = params.threads; + } + + pmcx_maxclique(pmc_graph& G, input& params) { + bound = G.get_kcores(); + order = G.get_kcore_ordering(); + setup_bounds(params); + initialize(); + vertex_ordering = params.vertex_search_order; + decr_order = params.decreasing_order; + } + + ~pmcx_maxclique() {}; + + int search(pmc_graph& G, vector& sol); + void branch( + vector& vs, + vector& es, + vector &P, + vector& ind, + vector& C, + vector& C_max, + vector< vector >& colors, + int* &pruned, + int& mc); + + int search_dense(pmc_graph& G, vector& sol); + void branch_dense( + vector& vs, + vector& es, + vector &P, + vector& ind, + vector& C, + vector& C_max, + vector< vector >& colors, + int* &pruned, + int& mc, + bool** &adj); + + }; +}; + +#endif diff --git a/pmcx_maxclique_basic.cpp b/pmcx_maxclique_basic.cpp new file mode 100755 index 0000000..33ba40c --- /dev/null +++ b/pmcx_maxclique_basic.cpp @@ -0,0 +1,322 @@ +/** + ============================================================================ + Name : Parallel Maximum Clique (PMC) Library + Author : Ryan A. Rossi (rrossi@purdue.edu) + Description : A general high-performance parallel framework for computing + maximum cliques. The library is designed to be fast for large + sparse graphs. + + Copyright (C) 2012-2013, Ryan A. Rossi, All rights reserved. + + Please cite the following paper if used: + Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa + Patwary, A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs + and Temporal Strong Components, arXiv preprint 1302.6256, 2013. + + See http://ryanrossi.com/pmc for more information. + ============================================================================ + */ + +#include "pmcx_maxclique_basic.h" + +using namespace std; +using namespace pmc; + +int pmcx_maxclique_basic::search(pmc_graph& G, vector& sol) { + + vertices = G.get_vertices(); + edges = G.get_edges(); + degree = G.get_degree(); + int* pruned = new int[G.num_vertices()]; + memset(pruned, 0, G.num_vertices() * sizeof(int)); + int mc = lb, i = 0, u = 0; + + // initial pruning + int lb_idx = G.initial_pruning(G, pruned, lb); + + // set to worst case bound of cores/coloring + vector P, T; + P.reserve(G.get_max_degree()+1); + T.reserve(G.get_max_degree()+1); + + vector C, C_max; + C.reserve(G.get_max_degree()+1); + C_max.reserve(G.get_max_degree()+1); + + // init the neigh coloring array + vector< vector > colors(G.get_max_degree()+1); + for (int i = 0; i < G.get_max_degree()+1; i++) colors[i].reserve(G.get_max_degree()+1); + + // order verts for our search routine + vector V; + V.reserve(G.num_vertices()); + G.order_vertices(V,G,lb_idx,lb,vertex_ordering,decr_order); + cout << "|V| = " << V.size() < ind(G.num_vertices(),0); + vector es = G.get_edges_array(); + vector vs = G.get_vertices_array(); + + vector induce_time(num_threads,get_time()); + for (int t = 0; t < num_threads; ++t) induce_time[t] = induce_time[t] + t/4; + + #pragma omp parallel for schedule(dynamic) shared(pruned, G, T, V, mc, C_max, induce_time) \ + firstprivate(colors,ind,vs,es) private(u, P, C) + for (i = 0; i < (V.size()) - (mc-1); ++i) { + if (G.time_left(C_max,sec,time_limit,time_expired_msg)) { + + u = V[i].get_id(); + if ((*bound)[u] > mc) { + P.push_back(V[i]); + for (long long j = vs[u]; j < vs[u + 1]; ++j) + if (!pruned[es[j]]) + if ((*bound)[es[j]] > mc) + P.push_back(Vertex(es[j], (*degree)[es[j]])); + + if (P.size() > mc) { + neigh_coloring_bound(vs,es,P,ind,C,C_max,colors,pruned,mc); + if (P.back().get_bound() > mc) { + branch(vs,es,P, ind, C, C_max, colors, pruned, mc); + } + } + P = T; + } + pruned[u] = 1; + + // dynamically reduce graph in a thread-safe manner + if ((get_time() - induce_time[omp_get_thread_num()]) > wait_time) { + G.reduce_graph( vs, es, pruned, G, i+lb_idx, mc); + G.graph_stats(G, mc, i+lb_idx, sec); + induce_time[omp_get_thread_num()] = get_time(); + } + } + } + + if (pruned) delete[] pruned; + + sol.resize(mc); + for (int i = 0; i < C_max.size(); i++) sol[i] = C_max[i]; + G.print_break(); + return sol.size(); +} + + + + +void pmcx_maxclique_basic::branch( + vector& vs, + vector& es, + vector &P, + vector& ind, + vector& C, + vector& C_max, + vector< vector >& colors, + int* &pruned, + int& mc) { + + // stop early if ub is reached + if (not_reached_ub) { + while (P.size() > 0) { + // terminating condition + if (C.size() + P.back().get_bound() > mc) { + int v = P.back().get_id(); C.push_back(v); + + vector R; R.reserve(P.size()); + for (long long j = vs[v]; j < vs[v + 1]; j++) ind[es[j]] = 1; + + // intersection of N(v) and P - {v} + for (int k = 0; k < P.size() - 1; k++) + if (ind[P[k].get_id()]) + if (!pruned[P[k].get_id()]) + if ((*bound)[P[k].get_id()] > mc) + R.push_back(P[k]); + + for (long long j = vs[v]; j < vs[v + 1]; j++) ind[es[j]] = 0; + + + if (R.size() > 0) { + // color graph induced by R and sort for O(1) bound check + neigh_coloring_bound(vs, es, R, ind, C, C_max, colors, pruned, mc); + // search reordered R + branch(vs, es, R, ind, C, C_max, colors, pruned, mc); + } + else if (C.size() > mc) { + // obtain lock + #pragma omp critical (update_mc) + if (C.size() > mc) { + // ensure updated max is flushed + mc = C.size(); + C_max = C; + print_mc_info(C,sec); + if (mc >= param_ub) { + not_reached_ub = false; + cout << "[pmc: upper bound reached] omega = " << mc <& sol) { + + vertices = G.get_vertices(); + edges = G.get_edges(); + degree = G.get_degree(); + bool** adj = G.adj; + + int* pruned = new int[G.num_vertices()]; + memset(pruned, 0, G.num_vertices() * sizeof(int)); + int mc = lb, i = 0, u = 0; + + // initial pruning + int lb_idx = G.initial_pruning(G, pruned, lb, adj); + + // set to worst case bound of cores/coloring + vector P, T; + P.reserve(G.get_max_degree()+1); + T.reserve(G.get_max_degree()+1); + + vector C, C_max; + C.reserve(G.get_max_degree()+1); + C_max.reserve(G.get_max_degree()+1); + + // init the neigh coloring array + vector< vector > colors(G.get_max_degree()+1); + for (int i = 0; i < G.get_max_degree()+1; i++) colors[i].reserve(G.get_max_degree()+1); + + // order verts for our search routine + vector V; + V.reserve(G.num_vertices()); + G.order_vertices(V,G,lb_idx,lb,vertex_ordering,decr_order); + cout << "|V| = " << V.size() < ind(G.num_vertices(),0); + vector es = G.get_edges_array(); + vector vs = G.get_vertices_array(); + + vector induce_time(num_threads,get_time()); + for (int t = 0; t < num_threads; ++t) induce_time[t] = induce_time[t] + t/4; + + + #pragma omp parallel for schedule(dynamic) shared(pruned, G, adj, T, V, mc, C_max, induce_time) \ + firstprivate(colors,ind,vs,es) private(u, P, C) + for (i = 0; i < (V.size()) - (mc-1); ++i) { + if (G.time_left(C_max,sec,time_limit,time_expired_msg)) { + + u = V[i].get_id(); + if ((*bound)[u] > mc) { + P.push_back(V[i]); + for (long long j = vs[u]; j < vs[u + 1]; ++j) + if (!pruned[es[j]]) + if ((*bound)[es[j]] > mc) + P.push_back(Vertex(es[j], (*degree)[es[j]])); + + if (P.size() > mc) { + neigh_coloring_dense(vs,es,P,ind,C,C_max,colors,mc, adj); + if (P.back().get_bound() > mc) { + branch_dense(vs,es,P, ind, C, C_max, colors, pruned, mc, adj); + } + } + P = T; + } + pruned[u] = 1; + for (long long j = vs[u]; j < vs[u + 1]; j++) { + adj[u][es[j]] = false; + adj[es[j]][u] = false; + } + + // dynamically reduce graph in a thread-safe manner + if ((get_time() - induce_time[omp_get_thread_num()]) > wait_time) { + G.reduce_graph( vs, es, pruned, G, i+lb_idx, mc); + G.graph_stats(G, mc, i+lb_idx, sec); + induce_time[omp_get_thread_num()] = get_time(); + } + } + } + + if (pruned) delete[] pruned; + + sol.resize(mc); + for (int i = 0; i < C_max.size(); i++) sol[i] = C_max[i]; + G.print_break(); + return sol.size(); +} + + + + +void pmcx_maxclique_basic::branch_dense( + vector& vs, + vector& es, + vector &P, + vector& ind, + vector& C, + vector& C_max, + vector< vector >& colors, + int* &pruned, + int& mc, + bool** &adj) { + + // stop early if ub is reached + if (not_reached_ub) { + while (P.size() > 0) { + // terminating condition + if (C.size() + P.back().get_bound() > mc) { + int v = P.back().get_id(); C.push_back(v); + vector R; R.reserve(P.size()); + + for (int k = 0; k < P.size() - 1; k++) + // indicates neighbor AND pruned, since threads dynamically update it + if (adj[v][P[k].get_id()]) + if ((*bound)[P[k].get_id()] > mc) + R.push_back(P[k]); + + if (R.size() > 0) { + // color graph induced by R and sort for O(1) + neigh_coloring_dense(vs, es, R, ind, C, C_max, colors, mc, adj); + branch_dense(vs, es, R, ind, C, C_max, colors, pruned, mc, adj); + } + else if (C.size() > mc) { + // obtain lock + #pragma omp critical (update_mc) + if (C.size() > mc) { + // ensure updated max is flushed + mc = C.size(); + C_max = C; + print_mc_info(C,sec); + if (mc >= param_ub) { + not_reached_ub = false; + cout << "[pmc: upper bound reached] omega = " << mc < +#include +#include +#include +#include +#include "pmc_headers.h" +#include "pmc_utils.h" +#include "pmc_graph.h" +#include "pmc_input.h" +#include "pmc_vertex.h" +#include "pmc_neigh_cores.h" +#include "pmc_neigh_coloring.h" + +using namespace std; + +namespace pmc { + + class pmcx_maxclique_basic { + public: + vector* edges; + vector* vertices; + vector* bound; + vector* order; + vector* degree; + int param_ub; + int ub; + int lb; + double time_limit; + double sec; + double wait_time; + bool not_reached_ub; + bool time_expired_msg; + bool decr_order; + + string vertex_ordering; + int edge_ordering; + int style_bounds; + int style_dynamic_bounds; + + int num_threads; + + void initialize() { + vertex_ordering = "deg"; + edge_ordering = 0; + style_bounds = 0; + style_dynamic_bounds = 0; + not_reached_ub = true; + time_expired_msg = true; + decr_order = false; + } + + void setup_bounds(input& params) { + lb = params.lb; + ub = params.ub; + param_ub = params.param_ub; + if (param_ub == 0) + param_ub = ub; + time_limit = params.time_limit; + wait_time = params.remove_time; + sec = get_time(); + + num_threads = params.threads; + } + + + pmcx_maxclique_basic(pmc_graph& G, input& params) { + bound = G.get_kcores(); + order = G.get_kcore_ordering(); + setup_bounds(params); + initialize(); + vertex_ordering = params.vertex_search_order; + decr_order = params.decreasing_order; + } + + ~pmcx_maxclique_basic() {}; + + int search(pmc_graph& G, vector& sol); + + void branch( + vector& vs, + vector& es, + vector &P, + vector& ind, + vector& C, + vector& C_max, + vector< vector >& colors, + int* &pruned, + int& mc); + + + int search_dense(pmc_graph& G, vector& sol); + + void branch_dense( + vector& vs, + vector& es, + vector &P, + vector& ind, + vector& C, + vector& C_max, + vector< vector >& colors, + int* &pruned, + int& mc, + bool** &adj); + + }; +}; + +#endif