-
Notifications
You must be signed in to change notification settings - Fork 120
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
The nvcc linker is stupid and won't recognize functions in other files linked with gcc or other files linked with nvcc that end in .c. So, .cu versions of all relevant files for the CUDA version have been copied from the dependencies in the sequential version. Let's hope we can fix this hack later. Note that all the CUDA source files have unique prefix names so that the object names don't overlap with those of the sequential version. This way, we don't break 'make seq'.
- Loading branch information
Showing
5 changed files
with
539 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,183 @@ | ||
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ | ||
/* File: file_io.c */ | ||
/* Description: This program reads point data from a file */ | ||
/* and write cluster output to files */ | ||
/* Input file format: */ | ||
/* ascii file: each line contains 1 data object */ | ||
/* binary file: first 4-byte integer is the number of data */ | ||
/* objects and 2nd integer is the no. of features (or */ | ||
/* coordinates) of each object */ | ||
/* */ | ||
/* Author: Wei-keng Liao */ | ||
/* ECE Department Northwestern University */ | ||
/* email: wkliao@ece.northwestern.edu */ | ||
/* Copyright, 2005, Wei-keng Liao */ | ||
/* */ | ||
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ | ||
|
||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <string.h> /* strtok() */ | ||
#include <sys/types.h> /* open() */ | ||
#include <sys/stat.h> | ||
#include <fcntl.h> | ||
#include <unistd.h> /* read(), close() */ | ||
|
||
#include "kmeans.h" | ||
|
||
#define MAX_CHAR_PER_LINE 128 | ||
|
||
|
||
/*---< file_read() >---------------------------------------------------------*/ | ||
float** file_read(int isBinaryFile, /* flag: 0 or 1 */ | ||
char *filename, /* input file name */ | ||
int *numObjs, /* no. data objects (local) */ | ||
int *numCoords) /* no. coordinates */ | ||
{ | ||
float **objects; | ||
int i, j, len; | ||
ssize_t numBytesRead; | ||
|
||
if (isBinaryFile) { /* input file is in raw binary format -------------*/ | ||
int infile; | ||
if ((infile = open(filename, O_RDONLY, "0600")) == -1) { | ||
fprintf(stderr, "Error: no such file (%s)\n", filename); | ||
return NULL; | ||
} | ||
numBytesRead = read(infile, numObjs, sizeof(int)); | ||
assert(numBytesRead == sizeof(int)); | ||
numBytesRead = read(infile, numCoords, sizeof(int)); | ||
assert(numBytesRead == sizeof(int)); | ||
if (_debug) { | ||
printf("File %s numObjs = %d\n",filename,*numObjs); | ||
printf("File %s numCoords = %d\n",filename,*numCoords); | ||
} | ||
|
||
/* allocate space for objects[][] and read all objects */ | ||
len = (*numObjs) * (*numCoords); | ||
objects = (float**)malloc((*numObjs) * sizeof(float*)); | ||
assert(objects != NULL); | ||
objects[0] = (float*) malloc(len * sizeof(float)); | ||
assert(objects[0] != NULL); | ||
for (i=1; i<(*numObjs); i++) | ||
objects[i] = objects[i-1] + (*numCoords); | ||
|
||
numBytesRead = read(infile, objects[0], len*sizeof(float)); | ||
assert(numBytesRead == len*sizeof(float)); | ||
|
||
close(infile); | ||
} | ||
else { /* input file is in ASCII format -------------------------------*/ | ||
FILE *infile; | ||
char *line, *ret; | ||
int lineLen; | ||
|
||
if ((infile = fopen(filename, "r")) == NULL) { | ||
fprintf(stderr, "Error: no such file (%s)\n", filename); | ||
return NULL; | ||
} | ||
|
||
/* first find the number of objects */ | ||
lineLen = MAX_CHAR_PER_LINE; | ||
line = (char*) malloc(lineLen); | ||
assert(line != NULL); | ||
|
||
(*numObjs) = 0; | ||
while (fgets(line, lineLen, infile) != NULL) { | ||
/* check each line to find the max line length */ | ||
while (strlen(line) == lineLen-1) { | ||
/* this line read is not complete */ | ||
len = strlen(line); | ||
fseek(infile, -len, SEEK_CUR); | ||
|
||
/* increase lineLen */ | ||
lineLen += MAX_CHAR_PER_LINE; | ||
line = (char*) realloc(line, lineLen); | ||
assert(line != NULL); | ||
|
||
ret = fgets(line, lineLen, infile); | ||
assert(ret != NULL); | ||
} | ||
|
||
if (strtok(line, " \t\n") != 0) | ||
(*numObjs)++; | ||
} | ||
rewind(infile); | ||
if (_debug) printf("lineLen = %d\n",lineLen); | ||
|
||
/* find the no. objects of each object */ | ||
(*numCoords) = 0; | ||
while (fgets(line, lineLen, infile) != NULL) { | ||
if (strtok(line, " \t\n") != 0) { | ||
/* ignore the id (first coordiinate): numCoords = 1; */ | ||
while (strtok(NULL, " ,\t\n") != NULL) (*numCoords)++; | ||
break; /* this makes read from 1st object */ | ||
} | ||
} | ||
rewind(infile); | ||
if (_debug) { | ||
printf("File %s numObjs = %d\n",filename,*numObjs); | ||
printf("File %s numCoords = %d\n",filename,*numCoords); | ||
} | ||
|
||
/* allocate space for objects[][] and read all objects */ | ||
len = (*numObjs) * (*numCoords); | ||
objects = (float**)malloc((*numObjs) * sizeof(float*)); | ||
assert(objects != NULL); | ||
objects[0] = (float*) malloc(len * sizeof(float)); | ||
assert(objects[0] != NULL); | ||
for (i=1; i<(*numObjs); i++) | ||
objects[i] = objects[i-1] + (*numCoords); | ||
|
||
i = 0; | ||
/* read all objects */ | ||
while (fgets(line, lineLen, infile) != NULL) { | ||
if (strtok(line, " \t\n") == NULL) continue; | ||
for (j=0; j<(*numCoords); j++) | ||
objects[i][j] = atof(strtok(NULL, " ,\t\n")); | ||
i++; | ||
} | ||
|
||
fclose(infile); | ||
free(line); | ||
} | ||
|
||
return objects; | ||
} | ||
|
||
/*---< file_write() >---------------------------------------------------------*/ | ||
int file_write(char *filename, /* input file name */ | ||
int numClusters, /* no. clusters */ | ||
int numObjs, /* no. data objects */ | ||
int numCoords, /* no. coordinates (local) */ | ||
float **clusters, /* [numClusters][numCoords] centers */ | ||
int *membership) /* [numObjs] */ | ||
{ | ||
FILE *fptr; | ||
int i, j; | ||
char outFileName[1024]; | ||
|
||
/* output: the coordinates of the cluster centres ----------------------*/ | ||
sprintf(outFileName, "%s.cluster_centres", filename); | ||
printf("Writing coordinates of K=%d cluster centers to file \"%s\"\n", | ||
numClusters, outFileName); | ||
fptr = fopen(outFileName, "w"); | ||
for (i=0; i<numClusters; i++) { | ||
fprintf(fptr, "%d ", i); | ||
for (j=0; j<numCoords; j++) | ||
fprintf(fptr, "%f ", clusters[i][j]); | ||
fprintf(fptr, "\n"); | ||
} | ||
fclose(fptr); | ||
|
||
/* output: the closest cluster centre to each of the data points --------*/ | ||
sprintf(outFileName, "%s.membership", filename); | ||
printf("Writing membership of N=%d data objects to file \"%s\"\n", | ||
numObjs, outFileName); | ||
fptr = fopen(outFileName, "w"); | ||
for (i=0; i<numObjs; i++) | ||
fprintf(fptr, "%d %d\n", i, membership[i]); | ||
fclose(fptr); | ||
|
||
return 1; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ | ||
/* File: seq_kmeans.c (sequential version) */ | ||
/* Description: Implementation of simple k-means clustering algorithm */ | ||
/* This program takes an array of N data objects, each with */ | ||
/* M coordinates and performs a k-means clustering given a */ | ||
/* user-provided value of the number of clusters (K). The */ | ||
/* clustering results are saved in 2 arrays: */ | ||
/* 1. a returned array of size [K][N] indicating the center */ | ||
/* coordinates of K clusters */ | ||
/* 2. membership[N] stores the cluster center ids, each */ | ||
/* corresponding to the cluster a data object is assigned */ | ||
/* */ | ||
/* Author: Wei-keng Liao */ | ||
/* ECE Department, Northwestern University */ | ||
/* email: wkliao@ece.northwestern.edu */ | ||
/* Copyright, 2005, Wei-keng Liao */ | ||
/* */ | ||
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ | ||
|
||
#include <stdio.h> | ||
#include <stdlib.h> | ||
|
||
#include "kmeans.h" | ||
|
||
|
||
/*----< euclid_dist_2() >----------------------------------------------------*/ | ||
/* square of Euclid distance between two multi-dimensional points */ | ||
__inline static | ||
float euclid_dist_2(int numdims, /* no. dimensions */ | ||
float *coord1, /* [numdims] */ | ||
float *coord2) /* [numdims] */ | ||
{ | ||
int i; | ||
float ans=0.0; | ||
|
||
for (i=0; i<numdims; i++) | ||
ans += (coord1[i]-coord2[i]) * (coord1[i]-coord2[i]); | ||
|
||
return(ans); | ||
} | ||
|
||
/*----< find_nearest_cluster() >---------------------------------------------*/ | ||
__inline static | ||
int find_nearest_cluster(int numClusters, /* no. clusters */ | ||
int numCoords, /* no. coordinates */ | ||
float *object, /* [numCoords] */ | ||
float **clusters) /* [numClusters][numCoords] */ | ||
{ | ||
int index, i; | ||
float dist, min_dist; | ||
|
||
/* find the cluster id that has min distance to object */ | ||
index = 0; | ||
min_dist = euclid_dist_2(numCoords, object, clusters[0]); | ||
|
||
for (i=1; i<numClusters; i++) { | ||
dist = euclid_dist_2(numCoords, object, clusters[i]); | ||
/* no need square root */ | ||
if (dist < min_dist) { /* find the min and its array index */ | ||
min_dist = dist; | ||
index = i; | ||
} | ||
} | ||
return(index); | ||
} | ||
|
||
/*----< seq_kmeans() >-------------------------------------------------------*/ | ||
/* return an array of cluster centers of size [numClusters][numCoords] */ | ||
float** seq_kmeans(float **objects, /* in: [numObjs][numCoords] */ | ||
int numCoords, /* no. features */ | ||
int numObjs, /* no. objects */ | ||
int numClusters, /* no. clusters */ | ||
float threshold, /* % objects change membership */ | ||
int *membership) /* out: [numObjs] */ | ||
{ | ||
int i, j, index, loop=0; | ||
int *newClusterSize; /* [numClusters]: no. objects assigned in each | ||
new cluster */ | ||
float delta; /* % of objects change their clusters */ | ||
float **clusters; /* out: [numClusters][numCoords] */ | ||
float **newClusters; /* [numClusters][numCoords] */ | ||
|
||
/* allocate a 2D space for returning variable clusters[] (coordinates | ||
of cluster centers) */ | ||
clusters = (float**) malloc(numClusters * sizeof(float*)); | ||
assert(clusters != NULL); | ||
clusters[0] = (float*) malloc(numClusters * numCoords * sizeof(float)); | ||
assert(clusters[0] != NULL); | ||
for (i=1; i<numClusters; i++) | ||
clusters[i] = clusters[i-1] + numCoords; | ||
|
||
/* pick first numClusters elements of objects[] as initial cluster centers*/ | ||
for (i=0; i<numClusters; i++) | ||
for (j=0; j<numCoords; j++) | ||
clusters[i][j] = objects[i][j]; | ||
|
||
/* initialize membership[] */ | ||
for (i=0; i<numObjs; i++) membership[i] = -1; | ||
|
||
/* need to initialize newClusterSize and newClusters[0] to all 0 */ | ||
newClusterSize = (int*) calloc(numClusters, sizeof(int)); | ||
assert(newClusterSize != NULL); | ||
|
||
newClusters = (float**) malloc(numClusters * sizeof(float*)); | ||
assert(newClusters != NULL); | ||
newClusters[0] = (float*) calloc(numClusters * numCoords, sizeof(float)); | ||
assert(newClusters[0] != NULL); | ||
for (i=1; i<numClusters; i++) | ||
newClusters[i] = newClusters[i-1] + numCoords; | ||
|
||
do { | ||
delta = 0.0; | ||
for (i=0; i<numObjs; i++) { | ||
/* find the array index of nestest cluster center */ | ||
index = find_nearest_cluster(numClusters, numCoords, objects[i], | ||
clusters); | ||
|
||
/* if membership changes, increase delta by 1 */ | ||
if (membership[i] != index) delta += 1.0; | ||
|
||
/* assign the membership to object i */ | ||
membership[i] = index; | ||
|
||
/* update new cluster centers : sum of objects located within */ | ||
newClusterSize[index]++; | ||
for (j=0; j<numCoords; j++) | ||
newClusters[index][j] += objects[i][j]; | ||
} | ||
|
||
/* average the sum and replace old cluster centers with newClusters */ | ||
for (i=0; i<numClusters; i++) { | ||
for (j=0; j<numCoords; j++) { | ||
if (newClusterSize[i] > 0) | ||
clusters[i][j] = newClusters[i][j] / newClusterSize[i]; | ||
newClusters[i][j] = 0.0; /* set back to 0 */ | ||
} | ||
newClusterSize[i] = 0; /* set back to 0 */ | ||
} | ||
|
||
delta /= numObjs; | ||
} while (delta > threshold && loop++ < 500); | ||
|
||
free(newClusters[0]); | ||
free(newClusters); | ||
free(newClusterSize); | ||
|
||
return clusters; | ||
} | ||
|
Oops, something went wrong.