Skip to content

Commit

Permalink
Initial checkin for spectral clustering and corresponding packages
Browse files Browse the repository at this point in the history
  • Loading branch information
baihongjie committed Nov 5, 2009
0 parents commit 87c9e19
Show file tree
Hide file tree
Showing 588 changed files with 169,094 additions and 0 deletions.
43 changes: 43 additions & 0 deletions Makefile
@@ -0,0 +1,43 @@
CC=mpicxx
C=gcc
CFLAGS=-O3 -Wall -Wno-sign-compare

all: compute_distance distance_to_similarity evd kmeans

clean:
rm -f *.o parpack/*.o parpack/*.a f2c/*.o f2c/*.a
rm -f compute_distance distance_to_similarity evd kmeans


common.o: common.cc common.h
$(CC) -c $(CFLAGS) common.cc -o common.o

compute_distance.o: compute_distance.cc compute_distance.h
$(CC) -c $(CFLAGS) compute_distance.cc -o compute_distance.o

compute_distance: compute_distance.o common.o
$(CC) $(CFLAGS) common.o compute_distance.o -o compute_distance

distance_to_similarity.o: distance_to_similarity.cc distance_to_similarity.h
$(CC) -c $(CFLAGS) distance_to_similarity.cc -o distance_to_similarity.o

distance_to_similarity: distance_to_similarity.o common.o
$(CC) $(CFLAGS) common.o distance_to_similarity.o -o distance_to_similarity

parpacklib:
( cd parpack; $(MAKE))

f2clib:
( cd f2c; $(MAKE))

evd.o: evd.cc evd.h
$(CC) -c $(CFLAGS) evd.cc -o evd.o

evd: evd.o common.o parpacklib f2clib
$(CC) $(CFLAGS) common.o evd.o -Lparpack/ -Lf2c/ -lparpack -lf2c -o evd

kmeans.o: kmeans.cc kmeans.h
$(CC) -c $(CFLAGS) kmeans.cc -o kmeans.o

kmeans: kmeans.o common.o
$(CC) $(CFLAGS) common.o kmeans.o -o kmeans
94 changes: 94 additions & 0 deletions common.cc
@@ -0,0 +1,94 @@
// Copyright 2009 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "common.h"

char kSegmentFaultCauser[] = "Used to cause artificial segmentation fault";

namespace learning_psc {

int GetAccumulativeSample(const vector<double>& distribution) {
double distribution_sum = 0.0;
for (int i = 0; i < distribution.size(); ++i) {
distribution_sum += distribution[i];
}

double choice = RandDouble() * distribution_sum;
double sum_so_far = 0.0;
for (int i = 0; i < distribution.size(); ++i) {
sum_so_far += distribution[i];
if (sum_so_far >= choice) {
return i;
}
}

LOG(FATAL) << "Failed to choose element from distribution of size "
<< distribution.size() << " and sum " << distribution_sum;

return -1;
}

std::ostream& operator << (std::ostream& out, vector<double>& v) {
for (size_t i = 0; i < v.size(); ++i) {
out << v[i] << " ";
}
return out;
}

void IntToString(int i32, string* key) {
int original_size = key->size();
key->resize(original_size + sizeof(i32));
for (int i = sizeof(i32) - 1; i >= 0; --i) {
(*key)[original_size + i] = i32 & 0xff;
i32 = (i32 >> 8);
}
}
int StringToInt(const char* key, int size) {
CHECK(size == sizeof(int32));
int i32 = 0;
for (int i = 0; i < sizeof(i32); ++i) {
i32 = (i32 << 8);
i32 = i32 | static_cast<unsigned char>(key[i]);
}
return i32;
}
void Int64ToString(int64 i64, string* key) {
int original_size = key->size();
key->resize(original_size + sizeof(i64));
for (int i = sizeof(i64) - 1; i >= 0; --i) {
(*key)[original_size + i] = i64 & 0xff;
i64 = (i64 >> 8);
}
}
int64 StringToInt64(const char* key, int size) {
CHECK(size == sizeof(int64));
int64 i64 = 0;
for (int i = 0; i < sizeof(i64); ++i) {
i64 = (i64 << 8);
i64 = i64 | static_cast<unsigned char>(key[i]);
}
return i64;
}
void DoubleToString(double d, string* key) {
int64 l;
memcpy(&l, &d, sizeof(d));
Int64ToString(l, key);
}
double StringToDouble(const char* key, int size) {
int64 l = StringToInt64(key, size);
double d;
memcpy(&d, &l, sizeof(d));
return d;
}
} // namespace learning_psc
221 changes: 221 additions & 0 deletions common.h
@@ -0,0 +1,221 @@
// Copyright 2009 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef _OPENSOURCE_PSC_COMMON_H__
#define _OPENSOURCE_PSC_COMMON_H__

#include <stdlib.h>
#include <iostream>
#include <fstream>
#include <sstream>
#include <list>
#include <map>
#include <string>
#include <vector>
#include <cmath>
#include <mpi.h>


// The CHECK_xxxx facilities, which generates a segmentation fault
// when a check is failed. If the program is run within a debugger,
// the segmentation fault makes the debugger keeps track of the stack,
// which provides the context of the fail.
//
extern char kSegmentFaultCauser[];

#define CHECK(a) if (!(a)) { \
std::cerr << "CHECK failed " \
<< __FILE__ << ":" << __LINE__ << "\n" \
<< #a << " = " << (a) << "\n"; \
*kSegmentFaultCauser = '\0'; \
} \

#define CHECK_EQ(a, b) if (!((a) == (b))) { \
std::cerr << "CHECK_EQ failed " \
<< __FILE__ << ":" << __LINE__ << "\n" \
<< #a << " = " << (a) << "\n" \
<< #b << " = " << (b) << "\n"; \
*kSegmentFaultCauser = '\0'; \
} \

#define CHECK_GT(a, b) if (!((a) > (b))) { \
std::cerr << "CHECK_GT failed " \
<< __FILE__ << ":" << __LINE__ << "\n" \
<< #a << " = " << (a) << "\n" \
<< #b << " = " << (b) << "\n"; \
*kSegmentFaultCauser = '\0'; \
} \

#define CHECK_LT(a, b) if (!((a) < (b))) { \
std::cerr << "CHECK_LT failed " \
<< __FILE__ << ":" << __LINE__ << "\n" \
<< #a << " = " << (a) << "\n" \
<< #b << " = " << (b) << "\n"; \
*kSegmentFaultCauser = '\0'; \
} \

#define CHECK_GE(a, b) if (!((a) >= (b))) { \
std::cerr << "CHECK_GE failed " \
<< __FILE__ << ":" << __LINE__ << "\n" \
<< #a << " = " << (a) << "\n" \
<< #b << " = " << (b) << "\n"; \
*kSegmentFaultCauser = '\0'; \
} \

#define CHECK_LE(a, b) if (!((a) <= (b))) { \
std::cerr << "CHECK_LE failed " \
<< __FILE__ << ":" << __LINE__ << "\n" \
<< #a << " = " << (a) << "\n" \
<< #b << " = " << (b) << "\n"; \
*kSegmentFaultCauser = '\0'; \
} \
\


// The log facility, which makes it easy to leave of trace of your
// program. The logs are classified according to their severity
// levels. Logs of the level FATAL will cause a segmentation fault,
// which makes the debugger to keep track of the stack.
//
// Examples:
// LOG(INFO) << iteration << "-th iteration ...";
// LOG(FATAL) << "Probability value < 0 " << prob_value;
//
enum LogSeverity { INFO, WARNING, ERROR, FATAL };

class Logger {
public:
Logger(LogSeverity ls, const std::string& file, int line)
: ls_(ls), file_(file), line_(line)
{}
std::ostream& stream() const {
return std::cerr << file_ << " (" << line_ << ") : ";
}
~Logger() {
if (ls_ == FATAL) {
*::kSegmentFaultCauser = '\0';
}
}
private:
LogSeverity ls_;
std::string file_;
int line_;
};

#define LOG(ls) Logger(ls, __FILE__, __LINE__).stream()

// Basis POD types.
typedef int int32;
#ifdef COMPILER_MSVC
typedef __int64 int64;
#else
typedef long long int64;
#endif

// Frequently-used STL containers.
using std::list;
using std::map;
using std::string;
using std::vector;
using std::string;
using std::istringstream;
using std::ifstream;
using std::pair;
using std::sqrt;

namespace learning_psc {

struct IndexValue {
int index;
double value;
IndexValue() {}
IndexValue(int i, double v) : index(i), value(v) {
}
};
// Generate a random float value in the range of [0,1) from the
// uniform distribution.
inline double RandDouble() {
return rand() / static_cast<double>(RAND_MAX);
}

// Generate a random integer value in the range of [0,bound) from the
// uniform distribution.
inline int RandInt(int bound) {
// NOTE: Do NOT use rand() % bound, which does not approximate a
// discrete uniform distribution will.
return static_cast<int>(RandDouble() * bound);
}

// Steaming output facilities for GSL matrix, GSL vector and STL
// vector.
std::ostream& operator << (std::ostream& out, vector<double>& v);


// Convert a int/double to its string form 4/8 bytes.
// The string form is not readable and could be converted back to int/double
// without losing any information.
void IntToString(int i32, string* key);
int StringToInt(const char* key, int size);
void Int64ToString(int64 i64, string* key);
int64 StringToInt64(const char* key, int size);

void DoubleToString(double d, string* key);
double StringToDouble(const char* key, int size);

// A maximum heap to store only Top N maximum elements of inserted elements.
// struct Cmp {bool operator()(double a, double b) {return a > b;}}
// TopN n(2); n.Insert(5); n.Insert(3); n.Insert(2);
// vector<double> v; n.Extract(&v);
// CHECK_EQ(3, v[0]);
// CHECK_EQ(5, v[1]);
template <class T, class Cmp>
class TopN {
public:
TopN(int n);
void Insert(const T& element);
void Extract(vector<T>* result);
private:
int n_;
vector<T> elements_;
Cmp cmp_;
};
template <class T, class Cmp>
TopN<T, Cmp>::TopN(int n) : n_(n), cmp_(Cmp()) {
}

template <class T, class Cmp>
void TopN<T, Cmp>::Insert(const T& element) {
if (elements_.size() == n_ && !cmp_(element, elements_.front())) {
return;
}
elements_.push_back(element);
push_heap(elements_.begin(), elements_.end(), cmp_);
if (elements_.size() > n_) {
pop_heap(elements_.begin(), elements_.end(), cmp_);
elements_.pop_back();
}
}
template <class T, class Cmp>
void TopN<T, Cmp>::Extract(vector<T>* result) {
int size = elements_.size();
for (int i = 0; i < size; ++i) {
pop_heap(elements_.begin(), elements_.end(), cmp_);
result->push_back(elements_.back());
elements_.pop_back();
}
}

} // namespace learning_psc

#endif // _OPENSOURCE_PSC_COMMON_H__

0 comments on commit 87c9e19

Please sign in to comment.