Skip to content

Commit

Permalink
moved structure-learning back to main branch
Browse files Browse the repository at this point in the history
  • Loading branch information
piantado committed Jun 29, 2016
1 parent 01257d3 commit 853ae2b
Show file tree
Hide file tree
Showing 56 changed files with 573 additions and 2,872 deletions.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Binary file modified gpumcmc
Binary file not shown.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
31 changes: 16 additions & 15 deletions run-all.sh
@@ -1,10 +1,9 @@

ITERATIONS=100 #200 #50000 # K20: 10000 #200000 ## About 1min for each 1k, for Zipf
OUTER_BLOCKS=100 #100 #100 outer / 100 burn works really well!
N=50000
ITERATIONS=250 #50000 # K20: 10000 #200000 ## About 1min for each 1k, for Zipf
OUTER_BLOCKS=200 #20
N=100000 #50000 # 131072 #131072 # 131072 # 128*1024
OUTROOT=run/
BURN=100 #100
ENUMERATION=9
BURN=0

# The executable
EXEC=./gpumcmc
Expand All @@ -15,16 +14,14 @@ shopt -s globstar
OUTROOT=./run

WHICHHALF=all

# for DATA in $(ls -d data-sources/Regression/1); do

# DATA=data-sources/Regression/-10_20
# for DATA in $(ls -d data-sources/Regression/*); do
# DATA=data-sources/Science/COBE
# DATA=data-sources/Stats/Logistic
# DATA=data-sources/NIST/MGH09
# for WHICHHALF in 'first-half' 'even-half' 'all' ; do
for DATA in data-sources/Science/COBE data-sources/Science/Hubble data-sources/Science/Galileo ; do
# for DATA in $(ls -d data-sources/NIST/*) ; do
# DATA=data-sources/Science/BalmerSeries
for WHICHHALF in 'first-half' 'even-half' 'all' ; do
for DATA in $(ls -d data-sources/Science/*) ; do
# for DATA in $(ls -d data-sources/Science/*) $(ls -d data-sources/Stats/*) $(ls -d data-sources/NIST/*) ; do
# for DATA in $(ls -d data-sources/Science/*) $(ls -d data-sources/Stats/*) $(ls -d data-sources/NIST/*) ; do

echo Running $DATA
Expand All @@ -39,9 +36,13 @@ for DATA in data-sources/Science/COBE data-sources/Science/Hubble data-sources/S

# run the CUDA MCMC; must use gnu time in order to output
# But here we also run time so it prints on command line too
time /usr/bin/time --output=$OUT/time.txt $EXEC --burn=$BURN --enumeration=$ENUMERATION --iterations=$ITERATIONS --in=$DATA/data.txt --outer=$OUTER_BLOCKS --burn=$BURN_BLOCKS --N=$N --out=$OUT --$WHICHHALF
time /usr/bin/time --output=$OUT/time.txt $EXEC --burn=$BURN --iterations=$ITERATIONS --in=$DATA/data.txt --outer=$OUTER_BLOCKS --burn=$BURN_BLOCKS --N=$N --out=$OUT --$WHICHHALF

sort -g -k 3 --parallel=4 $OUT/samples.txt | tail -n 10000 > $OUT/tops.txt &
sort -g -k 5 --parallel=4 $OUT/samples.txt | tail -n 10000 > $OUT/tops.txt &

# And a python post-processing script to do plotting.
# Run in the background so we can move to the next plot
nice -n 19 python postprocess.py --directory=$OUT &

done
done
# done
11 changes: 5 additions & 6 deletions src/__PRIMITIVES.cu
@@ -1,18 +1,17 @@
/* GENERATED BY make_primitives_header.py in Makefile. DO NOT EDIT */


const int NUM_OPS=12;
const int NUM_OPS=13;



enum OPS { NOOP_=0,X_=1,ZERO_=2,ONE_=3,EXP_=4,LOG_=5,NEG_=6,MUL_=7,POW_=8,ADD_=9,DIV_=10,SUB_=11};
enum OPS { NOOP_=0,CONSTANT_=1,X_=2,ZERO_=3,ONE_=4,EXP_=5,LOG_=6,NEG_=7,MUL_=8,POW_=9,ADD_=10,DIV_=11,SUB_=12};



__device__ const int NARGS[] = {0, 0,0,0,1,1,1,2,2,2,2,2 };
const int hNARGS[] = {0, 0,0,0,1,1,1,2,2,2,2,2 };
const char* NAMES[] = { "<NA>", "x","zero","one","exp","log","neg","mul","pow","add","div","sub" };
__device__ const int NARGS[] = {0, 0,0,0,0,1,1,1,2,2,2,2,2 };
const int hNARGS[] = {0, 0,0,0,0,1,1,1,2,2,2,2,2 };
const char* NAMES[] = { "<NA>", "constant","x","zero","one","exp","log","neg","mul","pow","add","div","sub" };

// Non-defined primitives, used potentially by print_program_as_expression

#define CONSTANT_ -991
1 change: 1 addition & 0 deletions src/__VM_INCLUDE.cu
@@ -1,6 +1,7 @@
/* GENERATED BY make_primitives_header.py in Makefile. DO NOT EDIT */


case CONSTANT_: stack[newtop] = (constant_i<MAX_CONSTANTS) * h->constants[constant_i]; constant_i += (constant_i<MAX_CONSTANTS); break;
case X_: stack[newtop] = X; break;
case ZERO_: stack[newtop] = 0.0f; break;
case ONE_: stack[newtop] = 1.0f; break;
Expand Down
78 changes: 0 additions & 78 deletions src/constant-kernel.cu

This file was deleted.

File renamed without changes.
36 changes: 11 additions & 25 deletions src/data.cu
Expand Up @@ -22,45 +22,31 @@ typedef struct datum {
data_t sd; // stdev of the output|input.
} datum;

// the range of data, including the SDs, NOT counting any exclusions for even/odd, etc.
data_t MIN_X, MAX_X, MIN_Y, MAX_Y;


// Load data froma file, putting it into our structs.
// This allows us to trim our data if we want
vector<datum>* load_data_file(const char* datapath, int FIRST_HALF_DATA, int EVEN_HALF_DATA) {

MIN_X = INFINITY;
MIN_Y = INFINITY;
MAX_X = -INFINITY;
MAX_Y = -INFINITY;

FILE* fp = fopen(datapath, "r");
if(fp==NULL) { cerr << "*** ERROR: Cannot open file:\t" << datapath <<"\n"; exit(1);}

vector<datum>* d = new vector<datum>();
char* line = NULL; size_t len=0;
char* line = NULL; size_t len=0; float x,y,sd;
while( getline(&line, &len, fp) != -1) {

if( line[0] == '#' || strspn(line, " \r\n\t") == strlen(line)) continue; // skip comments and whitespace

float x,y,sd; // these are set by sscanf
if (sscanf(line, "%f\t%f\t%f\n", &x, &y, &sd) == 3) { } // floats
else if (sscanf(line, "%e\t%e\t%e\n", &x, &y, &sd) == 3) { } // scientific notation
if( line[0] == '#' ) continue; // skip comments
else if (sscanf(line, "%f\t%f\t%f\n", &x, &y, &sd) == 3) { // floats
d->push_back( (datum){.input=(data_t)x, .output=(data_t)y, .sd=(data_t)sd} );
}
else if (sscanf(line, "%e\t%e\t%e\n", &x, &y, &sd) == 3) { // scientific notation
d->push_back( (datum){.input=(data_t)x, .output=(data_t)y, .sd=(data_t)sd} );
}
else if ( strspn(line, " \r\n\t") == strlen(line) ) { // skip whitespace
continue;
}
else {
cerr << "*** ERROR IN PARSING INPUT\t" << line << endl;
exit(1);
}

// add these x,y,sd
d->push_back( (datum){.input=(data_t)x, .output=(data_t)y, .sd=(data_t)sd} );

// our min/max includes the SDs
if(x > MAX_X) MAX_X = x;
if(x < MIN_X) MIN_X = x;

if(y+sd > MAX_Y) MAX_Y = y+sd;
if(y-sd < MIN_Y) MIN_Y = y-sd;
}
fclose(fp);

Expand Down
File renamed without changes.

0 comments on commit 853ae2b

Please sign in to comment.