Skip to content

Commit

Permalink
cusparse for pagerank on 0.25 scale of twitter-2010
Browse files Browse the repository at this point in the history
  • Loading branch information
liuexp committed Feb 18, 2013
1 parent 23325e5 commit 9b01395
Show file tree
Hide file tree
Showing 2 changed files with 164 additions and 31 deletions.
148 changes: 117 additions & 31 deletions cusparse.cu
Expand Up @@ -7,13 +7,53 @@

//const float RANDRESET = 0.15;
const float DAMPINGFACTOR = 0.85;
const char mtxFile[] = "/media/tmp/graphchi/data/test4";
const char mtxFile[] = "/media/tmp/graphchi/data/test3";
const int n = 61578171;
const int nnz = 345439900;
//const int n = 4, nnz = 9;
const int niter = 4;

void handleError(cudaError_t z){
switch(z){
case cudaErrorInvalidDevicePointer:
printf("invalid device ptr\n");
break;
case cudaErrorInvalidSymbol:
printf("invalid symbol\n");
break;
case cudaErrorMemoryAllocation:
printf("failed mem alloc\n");
break;
case cudaErrorMixedDeviceExecution:
printf("mixed device execution\n");
break;
case cudaSuccess:
printf("success memcpy\n");
break;
default:
printf("unknown\n");
break;
}

}

void FIXLINE(char *s){
int l = (int)strlen(s) - 1;
if(s[l] == '\n')s[l] = 0;
}

void readSampleMatrix(int *row, int *col, float *val, int m){
row[0]=0; col[0]=0; val[0]=1.0;
row[1]=0; col[1]=2; val[1]=2.0;
row[2]=0; col[2]=3; val[2]=3.0;
row[3]=1; col[3]=1; val[3]=4.0;
row[4]=2; col[4]=0; val[4]=5.0;
row[5]=2; col[5]=2; val[5]=6.0;
row[6]=2; col[6]=3; val[6]=7.0;
row[7]=3; col[7]=1; val[7]=8.0;
row[8]=3; col[8]=3; val[8]=9.0;
}

void readMatrix(int *row, int *col, float *val, int m){
FILE *fp = fopen(mtxFile,"r");
char s[1024];
Expand Down Expand Up @@ -52,21 +92,18 @@ int main(){
int *csrRow;
float *xHost, *zHost;
float *x, *y, *z;
int nnz, n;
cusparseStatus_t status;
cudaError_t cudaStat;

//time_t t0, t1;
//double diff;
clock_t tt;

n = 23026589;
nnz = 324874844;
clock_t tt,tt0;
tt0 = clock();

cooRowHostIdx = (int *) malloc(nnz * sizeof(int));
cooColHostIdx = (int *) malloc(nnz * sizeof(int));
cooValHost = (float *) malloc(nnz * sizeof(float));

readMatrix(cooRowHostIdx, cooColHostIdx, cooValHost, nnz);
//readSampleMatrix(cooRowHostIdx, cooColHostIdx, cooValHost, nnz);
xHost = (float *) malloc(n * sizeof(float));
//yHost = (float *) malloc(n * sizeof(float));
zHost = (float *) malloc(n * sizeof(float)); // the constant vector
Expand All @@ -80,16 +117,25 @@ int main(){
cudaMalloc((void **)&y, n * sizeof(float));
cudaMalloc((void **)&z, n * sizeof(float));
cudaMalloc((void**)&csrRow,(n+1)*sizeof(csrRow[0]));
printf("-- ELAPSED TIME: %.3fs\n", ((double)clock() - tt0)/CLOCKS_PER_SEC);

printf("starting memcpy\n");
printf("starting memcpy to device\n");
tt = clock();
cudaMemcpy(cooRowIdx, cooRowHostIdx, nnz * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(cooColIdx, cooColHostIdx, nnz * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(cooVal, cooValHost, nnz * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(x, xHost, n * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(z, zHost, n * sizeof(float), cudaMemcpyHostToDevice);
cudaStat = cudaMemcpy(cooRowIdx, cooRowHostIdx, nnz * sizeof(int), cudaMemcpyHostToDevice);
handleError(cudaStat);
cudaStat = cudaMemcpy(cooColIdx, cooColHostIdx, nnz * sizeof(int), cudaMemcpyHostToDevice);
handleError(cudaStat);
cudaStat = cudaMemcpy(cooVal, cooValHost, nnz * sizeof(float), cudaMemcpyHostToDevice);
handleError(cudaStat);
cudaStat = cudaMemcpy(x, xHost, n * sizeof(float), cudaMemcpyHostToDevice);
handleError(cudaStat);
cudaStat = cudaMemcpy(z, zHost, n * sizeof(float), cudaMemcpyHostToDevice);
handleError(cudaStat);
cudaStat = cudaMemcpy(y, zHost, n * sizeof(float), cudaMemcpyHostToDevice);
handleError(cudaStat);
cudaDeviceSynchronize();
printf("memcpy done in %.3fs\n", ((double)clock() - tt)/CLOCKS_PER_SEC);

printf("-- ELAPSED TIME: %.3fs\n", ((double)clock() - tt0)/CLOCKS_PER_SEC);

cusparseCreate(&handle);
cusparseCreateMatDescr(&descr);
Expand All @@ -98,30 +144,68 @@ int main(){
cusparseXcoo2csr(handle,cooRowIdx,nnz,n,csrRow,CUSPARSE_INDEX_BASE_ZERO);

const float tmpFloat1 = 1.0;

printf("starting iteration\n");
tt = clock();
// for each iteration, y<-z, y<-(1-d)*M*x + y, x <- y
cudaMemcpy(y, z, nnz * sizeof(float), cudaMemcpyDeviceToDevice);
status = cusparseScsrmv(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n, n, nnz,
&DAMPINGFACTOR, descr, cooVal, csrRow, cooColIdx, x, &tmpFloat1, y);
if(CUSPARSE_STATUS_SUCCESS != status){ //should use switch case here
printf("meow\n");
for(int i=0;i<niter;i++){
tt = clock();
// for each iteration, y<-z, y<-(1-d)*M*x + y, x <- y
cudaStat = cudaMemcpy(y, z, n * sizeof(float), cudaMemcpyDeviceToDevice);
handleError(cudaStat);
status = cusparseScsrmv(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n, n, nnz,
&DAMPINGFACTOR, descr, cooVal, csrRow, cooColIdx, x, &tmpFloat1, y);
switch(status){
case CUSPARSE_STATUS_INVALID_VALUE:
printf("invalid value");
break;
case CUSPARSE_STATUS_NOT_INITIALIZED:
printf("not initialized");
break;
case CUSPARSE_STATUS_ARCH_MISMATCH:
printf("arch mismatch");
break;
case CUSPARSE_STATUS_EXECUTION_FAILED:
printf("exe failed");
break;
case CUSPARSE_STATUS_INTERNAL_ERROR:
printf("internal error");
break;
case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
printf("not supported");
break;
case CUSPARSE_STATUS_ALLOC_FAILED:
printf("alloc failed");
break;
case CUSPARSE_STATUS_MAPPING_ERROR :
printf("map error");
break;
case CUSPARSE_STATUS_SUCCESS:
printf("success\n");
break;
default:
printf("unknown\n");
break;
}
if(CUSPARSE_STATUS_SUCCESS != status){ //should use switch case here
printf("meow\n");
}
cudaStat = cudaMemcpy(x, y, n * sizeof(float), cudaMemcpyDeviceToDevice);
handleError(cudaStat);
cudaDeviceSynchronize();
printf("iteration done in %.6fs\n", ((double)clock() - tt)/CLOCKS_PER_SEC);
printf("-- ELAPSED TIME: %.6fs\n", ((double)clock() - tt0)/CLOCKS_PER_SEC);
}
cudaMemcpy(x, y, n * sizeof(float), cudaMemcpyDeviceToDevice);
cudaDeviceSynchronize();
printf("iteration done in %.3fs\n", ((double)clock() - tt)/CLOCKS_PER_SEC);

printf("starting copying to host\n");
tt = clock();
cudaMemcpy(xHost, x, n * sizeof(float), cudaMemcpyDeviceToHost);
cudaDeviceSynchronize();
//FIXME:taking from y is probably faster than taking from x....
cudaStat = cudaMemcpy(xHost, y, n * sizeof(float), cudaMemcpyDeviceToHost);
handleError(cudaStat);
printf("memcpy done in %.3fs\n", ((double)clock() - tt)/CLOCKS_PER_SEC);
printf("-- ELAPSED TIME: %.3fs\n", ((double)clock() - tt0)/CLOCKS_PER_SEC);

for(int i=0;i<10;i++){
printf("%d\t%.3f\n", i, x[i]);
for(int i=0;i<min(n,10);i++){
printf("%d\t%.9f\t%.9f\n", i, xHost[i], zHost[i]);
}


free(xHost);
free(zHost);
free(cooColHostIdx);
Expand All @@ -134,6 +218,7 @@ int main(){
cudaFree(x);
cudaFree(y);
cudaFree(z);
printf("-- ELAPSED TIME: %.3fs\n", ((double)clock() - tt0)/CLOCKS_PER_SEC);

/* destroy matrix descriptor */
status = cusparseDestroyMatDescr(descr);
Expand All @@ -150,6 +235,7 @@ int main(){
printf("CUSPARSE Library release of resources failed");
return 1;
}
cudaDeviceReset();
return 0;
}

47 changes: 47 additions & 0 deletions pagerankCusparse0.25.txt
@@ -0,0 +1,47 @@
Read 345439900 lines matrix in 136.860s
-- ELAPSED TIME: 138.670s
starting memcpy to device
success memcpy
success memcpy
success memcpy
success memcpy
success memcpy
success memcpy
memcpy done in 0.890s
-- ELAPSED TIME: 139.560s
starting iteration
success memcpy
success
success memcpy
iteration done in 1.630000s
-- ELAPSED TIME: 141.190000s
success memcpy
success
success memcpy
iteration done in 0.280000s
-- ELAPSED TIME: 141.470000s
success memcpy
success
success memcpy
iteration done in 0.270000s
-- ELAPSED TIME: 141.740000s
success memcpy
success
success memcpy
iteration done in 0.280000s
-- ELAPSED TIME: 142.020000s
starting copying to host
success memcpy
memcpy done in 0.050s
-- ELAPSED TIME: 142.070s
0 0.000000002 0.000000002
1 0.000009863 0.000000002
2 0.000072435 0.000000002
3 0.002595886 0.000000002
4 0.087609343 0.000000002
5 0.311934739 0.000000002
6 0.001448659 0.000000002
7 0.214525610 0.000000002
8 0.001026526 0.000000002
9 0.004133295 0.000000002
-- ELAPSED TIME: 142.200s

0 comments on commit 9b01395

Please sign in to comment.