Skip to content

Commit

Permalink
Merge pull request #20 from karlstratos/master
Browse files Browse the repository at this point in the history
Optionally preserve the full hierarchy without pruning.
  • Loading branch information
percyliang committed Oct 20, 2017
2 parents 531636d + 5f8eaa7 commit 123bff3
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions wcluster.cc
Expand Up @@ -63,6 +63,7 @@ opt_define_int(num_threads, "threads", 1, "Number of threads to
opt_define_bool(chk, "chk", false, "Check data structures are valid (expensive).");
opt_define_bool(print_stats, "stats", false, "Just print out stats.");
opt_define_bool(paths2map, "paths2map", false, "Take the paths file and generate a map file.");
opt_define_bool(no_prune, "no_prune", false, "Do not prune the hierarchy (show all N leaf clusters)");

#define use_restrict (!restrict_file.empty())
const char *delim_str = "$#$";
Expand Down Expand Up @@ -105,7 +106,7 @@ DoubleVecVec q2; // slots s, t (contianing clusters a, b) -> contribution to
DoubleVecVec L2; // slots s, t (containing clusters a, b) -> loss of mutual information if merge a and b

int curr_cluster_id; // ID to assign to a new cluster
int stage2_cluster_offset; // start of the IDs of clusters created in stage 2
int stage2_cluster_offset; // start of the IDs of clusters created in stage 2: set to 0 if no pruning.

double curr_minfo; // Mutual info, should be sum of all q2's

Expand Down Expand Up @@ -1057,7 +1058,7 @@ void do_clustering() {

compute_cluster_distribs();

stage2_cluster_offset = curr_cluster_id;
stage2_cluster_offset = (no_prune) ? 0 : curr_cluster_id;

// Stage 2: Merge the initC clusters in an hierarchical manner.
// O(C^3) time.
Expand Down Expand Up @@ -1094,7 +1095,7 @@ struct StackItem {

// The cluster tree is composed of the top part, which consists
// of Stage 2 merges, and the bottom part, which consists of stage 1 merges.
// Print out paths from the root only through the stage 2 merges.
// Print out paths from the root only through the stage 2 merges (print all if no pruning).
void output_cluster_paths() {
char path[16384];
vector<StackItem> stack;
Expand Down Expand Up @@ -1136,7 +1137,7 @@ void output_cluster_paths() {
}
else {
const IntPair &children = it->second;
// Only print out paths through the part of the tree constructed in stage 2.
// Only print out paths through the part of the tree constructed in stage 2 (print all if no pruning).
bool extend = a >= stage2_cluster_offset;
int new_path_i = path_i + extend;

Expand Down

0 comments on commit 123bff3

Please sign in to comment.