Skip to content

Commit

Permalink
Added UVA write test
Browse files Browse the repository at this point in the history
  • Loading branch information
tbennun committed Feb 7, 2016
1 parent 67789ff commit 8fe9420
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 13 deletions.
1 change: 1 addition & 0 deletions TESTS.md
Expand Up @@ -17,6 +17,7 @@ Level-1
* UVA: Tests DMA (Direct Memory Access) between each GPU and the host/other GPUs.
* Special flags:
* `--fullduplex`: Performs exchanges instead of uni-directional transfers
* `--write`: Performs DMA write instead of read
* `--from`: Specify only one GPU to copy from (0 for host, 1...N for a specific GPU or -1 for all GPUs)
* `--to`: Specify a single target GPU to copy to (1...N for a specific GPU or -1 for all GPUs)

Expand Down
16 changes: 11 additions & 5 deletions run.sh
Expand Up @@ -39,19 +39,25 @@ fi
echo "L1 Tests"
echo "--------"

echo "1/5 Half-duplex (unidirectional) memory copy"
echo "1/7 Half-duplex (unidirectional) memory copy"
./build/halfduplex > l1-halfduplex.log

echo "2/5 Full-duplex (bidirectional) memory copy"
echo "2/7 Full-duplex (bidirectional) memory copy"
./build/fullduplex > l1-fullduplex.log

echo "3/5 Half-duplex DMA"
echo "3/7 Half-duplex DMA Read"
./build/uva > l1-uvahalf.log

echo "4/5 Full-duplex DMA"
echo "4/7 Full-duplex DMA Read"
./build/uva --fullduplex > l1-uvafull.log

echo "5/5 Scaling"
echo "5/7 Half-duplex DMA Write"
./build/uva --write > l1-uvawhalf.log

echo "6/7 Full-duplex DMA Write"
./build/uva --write --fullduplex > l1-uvawfull.log

echo "7/7 Scaling"
./build/sgemm -n 4096 -k 4096 -m 4096 --repetitions=100 --regression=false --scaling=true > l1-scaling.log

# Run L2 tests
Expand Down
39 changes: 31 additions & 8 deletions src/L1/uva.cu
Expand Up @@ -41,6 +41,7 @@ DEFINE_uint64(type_size, sizeof(float), "The size of the data chunk to "
DEFINE_uint64(repetitions, 100, "Number of repetitions to average");
DEFINE_uint64(block_size, 32, "Copy kernel block size");
DEFINE_bool(fullduplex, false, "True for bi-directional copy");
DEFINE_bool(write, false, "Perform DMA write instead of read");

DEFINE_int32(from, -1, "Only copy from a single GPU index/host (Host is "
"0, GPUs start from 1), or -1 for all");
Expand Down Expand Up @@ -144,7 +145,13 @@ void CopySegmentUVA(int a, int b)
size_t sz = FLAGS_size / FLAGS_type_size, typesize = FLAGS_type_size;
dim3 block_dim (FLAGS_block_size),
grid_dim((sz + FLAGS_block_size - 1) / FLAGS_block_size);


// If using UVA to write, simply swap the buffers
if (FLAGS_write)
{
std::swap(deva_buff, devb_buff);
std::swap(deva_buff2, devb_buff2);
}

// Copy or Exchange using UVA
auto t1 = std::chrono::high_resolution_clock::now();
Expand Down Expand Up @@ -182,6 +189,15 @@ void CopySegmentUVA(int a, int b)
double MBps = (FLAGS_size / 1024.0 / 1024.0) / (mstime / 1000.0);

printf("%.2lf MB/s (%lf ms)\n", MBps, mstime);


// Swap buffers back, if necessary
if (FLAGS_write)
{
std::swap(deva_buff, devb_buff);
std::swap(deva_buff2, devb_buff2);
}


// Free buffers
if (a > 0)
Expand Down Expand Up @@ -283,17 +299,24 @@ int main(int argc, char **argv)

if (FLAGS_fullduplex)
{
if (i == 0)
printf("Exchanging between host and GPU %d: ", j - 1);
else
printf("Exchanging between GPU %d and GPU %d: ", i - 1, j - 1);
printf("Exchanging between GPU %d and GPU %d: ", i - 1, j - 1);
}
else
{
if (i == 0)
printf("Copying from host to GPU %d: ", j - 1);
if (!FLAGS_write)
{
if (i == 0)
printf("Copying from host to GPU %d: ", j - 1);
else
printf("Copying from GPU %d to GPU %d: ", i - 1, j - 1);
}
else
printf("Copying from GPU %d to GPU %d: ", i - 1, j - 1);
{
if (i == 0)
printf("Copying from GPU %d to host: ", j - 1);
else
printf("Copying from GPU %d to GPU %d: ", j - 1, i - 1);
}
}

// Make sure that DMA access is possible
Expand Down

0 comments on commit 8fe9420

Please sign in to comment.