Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[misc] Remove usage of deprecated functions in benchmarks #1193

Merged
merged 1 commit into from
Jun 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,4 @@ _build
*.bin
*.gif
*.mp4
*.dat
24 changes: 6 additions & 18 deletions benchmarks/fill_dense.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,7 @@ def benchmark_nested_struct():
a = ti.var(dt=ti.f32)
N = 512

@ti.layout
def place():
ti.root.dense(ti.ij, [N, N]).dense(ti.ij, [8, 8]).place(a)
ti.root.dense(ti.ij, [N, N]).dense(ti.ij, [8, 8]).place(a)

@ti.kernel
def fill():
Expand All @@ -52,9 +50,7 @@ def benchmark_nested_struct_listgen_8x8():
ti.cfg.demote_dense_struct_fors = False
N = 512

@ti.layout
def place():
ti.root.dense(ti.ij, [N, N]).dense(ti.ij, [8, 8]).place(a)
ti.root.dense(ti.ij, [N, N]).dense(ti.ij, [8, 8]).place(a)

@ti.kernel
def fill():
Expand All @@ -70,9 +66,7 @@ def benchmark_nested_struct_listgen_16x16():
ti.cfg.demote_dense_struct_fors = False
N = 256

@ti.layout
def place():
ti.root.dense(ti.ij, [N, N]).dense(ti.ij, [16, 16]).place(a)
ti.root.dense(ti.ij, [N, N]).dense(ti.ij, [16, 16]).place(a)

@ti.kernel
def fill():
Expand All @@ -87,9 +81,7 @@ def benchmark_nested_range_blocked():
a = ti.var(dt=ti.f32)
N = 512

@ti.layout
def place():
ti.root.dense(ti.ij, [N, N]).dense(ti.ij, [8, 8]).place(a)
ti.root.dense(ti.ij, [N, N]).dense(ti.ij, [8, 8]).place(a)

@ti.kernel
def fill():
Expand All @@ -105,9 +97,7 @@ def benchmark_nested_range():
a = ti.var(dt=ti.f32)
N = 512

@ti.layout
def place():
ti.root.dense(ti.ij, [N, N]).dense(ti.ij, [8, 8]).place(a)
ti.root.dense(ti.ij, [N, N]).dense(ti.ij, [8, 8]).place(a)

@ti.kernel
def fill():
Expand All @@ -124,9 +114,7 @@ def benchmark_root_listgen():
ti.cfg.demote_dense_struct_fors = False
N = 512

@ti.layout
def place():
ti.root.dense(ti.ij, [N, N]).dense(ti.ij, [8, 8]).place(a)
ti.root.dense(ti.ij, [N, N]).dense(ti.ij, [8, 8]).place(a)

@ti.kernel
def fill():
Expand Down
8 changes: 2 additions & 6 deletions benchmarks/fill_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@ def benchmark_nested_struct():
a = ti.var(dt=ti.f32)
N = 512

@ti.layout
def place():
ti.root.pointer(ti.ij, [N, N]).dense(ti.ij, [8, 8]).place(a)
ti.root.pointer(ti.ij, [N, N]).dense(ti.ij, [8, 8]).place(a)

@ti.kernel
def fill():
Expand All @@ -25,9 +23,7 @@ def benchmark_nested_struct_fill_and_clear():
a = ti.var(dt=ti.f32)
N = 512

@ti.layout
def place():
ti.root.pointer(ti.ij, [N, N]).dense(ti.ij, [8, 8]).place(a)
ti.root.pointer(ti.ij, [N, N]).dense(ti.ij, [8, 8]).place(a)

@ti.kernel
def fill():
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/mpm2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def substep():
g_v = grid_v[base + ti.Vector([i, j])]
weight = w[i][0] * w[j][1]
new_v += weight * g_v
new_C += 4 * inv_dx * weight * ti.outer_product(g_v, dpos)
new_C += 4 * inv_dx * weight * g_v.outer_product(dpos)
v[p], C[p] = new_v, new_C
x[p] += dt * v[p] # advection

Expand Down Expand Up @@ -226,7 +226,7 @@ def substep():
g_v = grid_v[base + ti.Vector([i, j])]
weight = w[i][0] * w[j][1]
new_v += weight * g_v
new_C += 4 * inv_dx * weight * ti.outer_product(g_v, dpos)
new_C += 4 * inv_dx * weight * g_v.outer_product(dpos)
v[p], C[p] = new_v, new_C
x[p] += dt * v[p] # advection

Expand Down
2 changes: 1 addition & 1 deletion misc/baselines/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ assert: assert.cu
clang++-8 -std=c++14 assert.cu -S -emit-llvm --cuda-gpu-arch=sm_61

gpu_memory_bound: gpu_memory_bound.cu
nvcc -std=c++14 gpu_memory_bound.cu -O3 -o gpu_memory_bound
nvcc -std=c++14 gpu_memory_bound.cu -O3 -o gpu_memory_bound --gpu-architecture=compute_61 --gpu-code=sm_61,compute_61

cpu_memory_bound: cpu_memory_bound.cpp
g++ -std=c++14 cpu_memory_bound.cpp -O3 -o cpu_memory_bound
Expand Down
33 changes: 22 additions & 11 deletions misc/baselines/gpu_memory_bound.cu
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,42 @@
#include <cuda_runtime.h>
#include "get_time.h"

__global__ void cpy(float *a, float *b, int *c, int n) {
__global__ void cpy(float *a, float *b, int n) {
unsigned int i = blockIdx.x * blockDim.x + threadIdx.x;
a[i] = b[i];
if (i < n)
a[i] = b[i];
}

int main() {
int n = 1024 * 1024 * 1024 / 4;
int n = 1024 * 1024 * 1024;
float *a, *b;
int *c;
cudaMalloc(&a, n * sizeof(float));
cudaMalloc(&b, n * sizeof(float));
cudaMalloc(&c, n * sizeof(float));
for (auto bs : {16, 32, 64, 128, 256}) {

int repeat = 25;

double t;
t = get_time();
for (int i = 0; i < repeat; i++) {
cudaMemcpyAsync(a, b, n * sizeof(float), cudaMemcpyDeviceToDevice, 0);
}
cudaDeviceSynchronize();
t = (get_time() - t) / repeat;
printf("cuMemcpyAsync 8GB data bw %.3f GB/s\n",
n * 8.0 / t / (1024 * 1024 * 1024.0f));

for (auto bs : {32, 64, 128, 256}) {
for (int i = 0; i < 10; i++) {
cpy<<<n / bs, bs>>>(a, b, c, n);
cpy<<<n / bs, bs>>>(a, b, n);
}
cudaDeviceSynchronize();
int repeat = 100;
auto t = get_time();
t = get_time();
for (int i = 0; i < repeat; i++) {
cpy<<<n / bs, bs>>>(a, b, c, n);
cpy<<<n / bs, bs>>>(a, b, n);
}
cudaDeviceSynchronize();
t = (get_time() - t) / repeat;
printf("memcpy 1GB data, block_size %d, %.2f ms bw %.3f GB/s\n", bs,
printf("memcpy 8GB data, block_dim %d, %.2f ms bw %.3f GB/s\n", bs,
t * 1000, n * 8.0 / t / (1024 * 1024 * 1024.0f));
}
}