Skip to content

Commit

Permalink
Add target 'cuda' to Makefile
Browse files Browse the repository at this point in the history
The nvcc linker is stupid and won't recognize functions in other files linked
with gcc or other files linked with nvcc that end in .c.  So, .cu versions of
all relevant files for the CUDA version have been copied from the dependencies
in the sequential version. Let's hope we can fix this hack later.

Note that all the CUDA source files have unique prefix names so that the object
names don't overlap with those of the sequential version. This way, we don't
break 'make seq'.
  • Loading branch information
serban committed Nov 27, 2010
1 parent 3266c70 commit 3289f8a
Show file tree
Hide file tree
Showing 5 changed files with 539 additions and 3 deletions.
26 changes: 23 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# vim:set ts=8 sw=8 sts=0 noet:

# * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# File: Makefile */
# Description: Makefile for programs running a simple k-means clustering */
Expand All @@ -12,13 +14,14 @@

.KEEP_STATE:

all: seq omp mpi
all: seq omp cuda mpi

DFLAGS =
OPTFLAGS = -O -NDEBUG
OPTFLAGS = -g
OPTFLAGS = -g -pg
INCFLAGS = -I.
CFLAGS = $(OPTFLAGS) $(DFLAGS) $(INCFLAGS)
NVCCFLAGS = $(CFLAGS) --ptxas-options=-v
LDFLAGS = $(OPTFLAGS)
LIBS =

Expand All @@ -30,6 +33,7 @@ OMPFLAGS = -fopenmp

CC = gcc
MPICC = mpicc
NVCC = nvcc

.c.o:
$(CC) $(CFLAGS) -c $<
Expand Down Expand Up @@ -84,9 +88,25 @@ seq: seq_main
seq_main: $(SEQ_OBJ) $(H_FILES)
$(CC) $(LDFLAGS) -o seq_main $(SEQ_OBJ) $(LIBS)

# ------------------------------------------------------------------------------
# CUDA Version

%.o : %.cu
$(NVCC) $(NVCCFLAGS) -o $@ -c $<

CUDA_C_SRC = cuda_main.cu cuda_io.cu cuda_wtime.cu
CUDA_CU_SRC = cuda_kmeans.cu

CUDA_C_OBJ = $(CUDA_C_SRC:%.cu=%.o)
CUDA_CU_OBJ = $(CUDA_CU_SRC:%.cu=%.o)

cuda: cuda_main
cuda_main: $(CUDA_C_OBJ) $(CUDA_CU_OBJ)
$(NVCC) $(LDFLAGS) -o $@ $(CUDA_C_OBJ) $(CUDA_CU_OBJ)

#---------------------------------------------------------------------
clean:
rm -rf *.o omp_main seq_main mpi_main \
rm -rf *.o omp_main seq_main mpi_main cuda_main \
core* .make.state \
*.cluster_centres *.membership \
Image_data/*.cluster_centres \
Expand Down
183 changes: 183 additions & 0 deletions cuda_io.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/* File: file_io.c */
/* Description: This program reads point data from a file */
/* and write cluster output to files */
/* Input file format: */
/* ascii file: each line contains 1 data object */
/* binary file: first 4-byte integer is the number of data */
/* objects and 2nd integer is the no. of features (or */
/* coordinates) of each object */
/* */
/* Author: Wei-keng Liao */
/* ECE Department Northwestern University */
/* email: wkliao@ece.northwestern.edu */
/* Copyright, 2005, Wei-keng Liao */
/* */
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

#include <stdio.h>
#include <stdlib.h>
#include <string.h> /* strtok() */
#include <sys/types.h> /* open() */
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h> /* read(), close() */

#include "kmeans.h"

#define MAX_CHAR_PER_LINE 128


/*---< file_read() >---------------------------------------------------------*/
float** file_read(int isBinaryFile, /* flag: 0 or 1 */
char *filename, /* input file name */
int *numObjs, /* no. data objects (local) */
int *numCoords) /* no. coordinates */
{
float **objects;
int i, j, len;
ssize_t numBytesRead;

if (isBinaryFile) { /* input file is in raw binary format -------------*/
int infile;
if ((infile = open(filename, O_RDONLY, "0600")) == -1) {
fprintf(stderr, "Error: no such file (%s)\n", filename);
return NULL;
}
numBytesRead = read(infile, numObjs, sizeof(int));
assert(numBytesRead == sizeof(int));
numBytesRead = read(infile, numCoords, sizeof(int));
assert(numBytesRead == sizeof(int));
if (_debug) {
printf("File %s numObjs = %d\n",filename,*numObjs);
printf("File %s numCoords = %d\n",filename,*numCoords);
}

/* allocate space for objects[][] and read all objects */
len = (*numObjs) * (*numCoords);
objects = (float**)malloc((*numObjs) * sizeof(float*));
assert(objects != NULL);
objects[0] = (float*) malloc(len * sizeof(float));
assert(objects[0] != NULL);
for (i=1; i<(*numObjs); i++)
objects[i] = objects[i-1] + (*numCoords);

numBytesRead = read(infile, objects[0], len*sizeof(float));
assert(numBytesRead == len*sizeof(float));

close(infile);
}
else { /* input file is in ASCII format -------------------------------*/
FILE *infile;
char *line, *ret;
int lineLen;

if ((infile = fopen(filename, "r")) == NULL) {
fprintf(stderr, "Error: no such file (%s)\n", filename);
return NULL;
}

/* first find the number of objects */
lineLen = MAX_CHAR_PER_LINE;
line = (char*) malloc(lineLen);
assert(line != NULL);

(*numObjs) = 0;
while (fgets(line, lineLen, infile) != NULL) {
/* check each line to find the max line length */
while (strlen(line) == lineLen-1) {
/* this line read is not complete */
len = strlen(line);
fseek(infile, -len, SEEK_CUR);

/* increase lineLen */
lineLen += MAX_CHAR_PER_LINE;
line = (char*) realloc(line, lineLen);
assert(line != NULL);

ret = fgets(line, lineLen, infile);
assert(ret != NULL);
}

if (strtok(line, " \t\n") != 0)
(*numObjs)++;
}
rewind(infile);
if (_debug) printf("lineLen = %d\n",lineLen);

/* find the no. objects of each object */
(*numCoords) = 0;
while (fgets(line, lineLen, infile) != NULL) {
if (strtok(line, " \t\n") != 0) {
/* ignore the id (first coordiinate): numCoords = 1; */
while (strtok(NULL, " ,\t\n") != NULL) (*numCoords)++;
break; /* this makes read from 1st object */
}
}
rewind(infile);
if (_debug) {
printf("File %s numObjs = %d\n",filename,*numObjs);
printf("File %s numCoords = %d\n",filename,*numCoords);
}

/* allocate space for objects[][] and read all objects */
len = (*numObjs) * (*numCoords);
objects = (float**)malloc((*numObjs) * sizeof(float*));
assert(objects != NULL);
objects[0] = (float*) malloc(len * sizeof(float));
assert(objects[0] != NULL);
for (i=1; i<(*numObjs); i++)
objects[i] = objects[i-1] + (*numCoords);

i = 0;
/* read all objects */
while (fgets(line, lineLen, infile) != NULL) {
if (strtok(line, " \t\n") == NULL) continue;
for (j=0; j<(*numCoords); j++)
objects[i][j] = atof(strtok(NULL, " ,\t\n"));
i++;
}

fclose(infile);
free(line);
}

return objects;
}

/*---< file_write() >---------------------------------------------------------*/
int file_write(char *filename, /* input file name */
int numClusters, /* no. clusters */
int numObjs, /* no. data objects */
int numCoords, /* no. coordinates (local) */
float **clusters, /* [numClusters][numCoords] centers */
int *membership) /* [numObjs] */
{
FILE *fptr;
int i, j;
char outFileName[1024];

/* output: the coordinates of the cluster centres ----------------------*/
sprintf(outFileName, "%s.cluster_centres", filename);
printf("Writing coordinates of K=%d cluster centers to file \"%s\"\n",
numClusters, outFileName);
fptr = fopen(outFileName, "w");
for (i=0; i<numClusters; i++) {
fprintf(fptr, "%d ", i);
for (j=0; j<numCoords; j++)
fprintf(fptr, "%f ", clusters[i][j]);
fprintf(fptr, "\n");
}
fclose(fptr);

/* output: the closest cluster centre to each of the data points --------*/
sprintf(outFileName, "%s.membership", filename);
printf("Writing membership of N=%d data objects to file \"%s\"\n",
numObjs, outFileName);
fptr = fopen(outFileName, "w");
for (i=0; i<numObjs; i++)
fprintf(fptr, "%d %d\n", i, membership[i]);
fclose(fptr);

return 1;
}
149 changes: 149 additions & 0 deletions cuda_kmeans.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/* File: seq_kmeans.c (sequential version) */
/* Description: Implementation of simple k-means clustering algorithm */
/* This program takes an array of N data objects, each with */
/* M coordinates and performs a k-means clustering given a */
/* user-provided value of the number of clusters (K). The */
/* clustering results are saved in 2 arrays: */
/* 1. a returned array of size [K][N] indicating the center */
/* coordinates of K clusters */
/* 2. membership[N] stores the cluster center ids, each */
/* corresponding to the cluster a data object is assigned */
/* */
/* Author: Wei-keng Liao */
/* ECE Department, Northwestern University */
/* email: wkliao@ece.northwestern.edu */
/* Copyright, 2005, Wei-keng Liao */
/* */
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

#include <stdio.h>
#include <stdlib.h>

#include "kmeans.h"


/*----< euclid_dist_2() >----------------------------------------------------*/
/* square of Euclid distance between two multi-dimensional points */
__inline static
float euclid_dist_2(int numdims, /* no. dimensions */
float *coord1, /* [numdims] */
float *coord2) /* [numdims] */
{
int i;
float ans=0.0;

for (i=0; i<numdims; i++)
ans += (coord1[i]-coord2[i]) * (coord1[i]-coord2[i]);

return(ans);
}

/*----< find_nearest_cluster() >---------------------------------------------*/
__inline static
int find_nearest_cluster(int numClusters, /* no. clusters */
int numCoords, /* no. coordinates */
float *object, /* [numCoords] */
float **clusters) /* [numClusters][numCoords] */
{
int index, i;
float dist, min_dist;

/* find the cluster id that has min distance to object */
index = 0;
min_dist = euclid_dist_2(numCoords, object, clusters[0]);

for (i=1; i<numClusters; i++) {
dist = euclid_dist_2(numCoords, object, clusters[i]);
/* no need square root */
if (dist < min_dist) { /* find the min and its array index */
min_dist = dist;
index = i;
}
}
return(index);
}

/*----< seq_kmeans() >-------------------------------------------------------*/
/* return an array of cluster centers of size [numClusters][numCoords] */
float** seq_kmeans(float **objects, /* in: [numObjs][numCoords] */
int numCoords, /* no. features */
int numObjs, /* no. objects */
int numClusters, /* no. clusters */
float threshold, /* % objects change membership */
int *membership) /* out: [numObjs] */
{
int i, j, index, loop=0;
int *newClusterSize; /* [numClusters]: no. objects assigned in each
new cluster */
float delta; /* % of objects change their clusters */
float **clusters; /* out: [numClusters][numCoords] */
float **newClusters; /* [numClusters][numCoords] */

/* allocate a 2D space for returning variable clusters[] (coordinates
of cluster centers) */
clusters = (float**) malloc(numClusters * sizeof(float*));
assert(clusters != NULL);
clusters[0] = (float*) malloc(numClusters * numCoords * sizeof(float));
assert(clusters[0] != NULL);
for (i=1; i<numClusters; i++)
clusters[i] = clusters[i-1] + numCoords;

/* pick first numClusters elements of objects[] as initial cluster centers*/
for (i=0; i<numClusters; i++)
for (j=0; j<numCoords; j++)
clusters[i][j] = objects[i][j];

/* initialize membership[] */
for (i=0; i<numObjs; i++) membership[i] = -1;

/* need to initialize newClusterSize and newClusters[0] to all 0 */
newClusterSize = (int*) calloc(numClusters, sizeof(int));
assert(newClusterSize != NULL);

newClusters = (float**) malloc(numClusters * sizeof(float*));
assert(newClusters != NULL);
newClusters[0] = (float*) calloc(numClusters * numCoords, sizeof(float));
assert(newClusters[0] != NULL);
for (i=1; i<numClusters; i++)
newClusters[i] = newClusters[i-1] + numCoords;

do {
delta = 0.0;
for (i=0; i<numObjs; i++) {
/* find the array index of nestest cluster center */
index = find_nearest_cluster(numClusters, numCoords, objects[i],
clusters);

/* if membership changes, increase delta by 1 */
if (membership[i] != index) delta += 1.0;

/* assign the membership to object i */
membership[i] = index;

/* update new cluster centers : sum of objects located within */
newClusterSize[index]++;
for (j=0; j<numCoords; j++)
newClusters[index][j] += objects[i][j];
}

/* average the sum and replace old cluster centers with newClusters */
for (i=0; i<numClusters; i++) {
for (j=0; j<numCoords; j++) {
if (newClusterSize[i] > 0)
clusters[i][j] = newClusters[i][j] / newClusterSize[i];
newClusters[i][j] = 0.0; /* set back to 0 */
}
newClusterSize[i] = 0; /* set back to 0 */
}

delta /= numObjs;
} while (delta > threshold && loop++ < 500);

free(newClusters[0]);
free(newClusters);
free(newClusterSize);

return clusters;
}

Loading

0 comments on commit 3289f8a

Please sign in to comment.