Skip to content

Commit

Permalink
[async] Set the default value of async_flush_every to 50 (#2169)
Browse files Browse the repository at this point in the history
  • Loading branch information
xumingkuan committed Jan 23, 2021
1 parent 368157a commit 1b73055
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 15 deletions.
2 changes: 1 addition & 1 deletion benchmarks/async_advection.py
Expand Up @@ -8,7 +8,7 @@

@benchmark_async
def simple_advection(scale):
n = 128 * 2**int((math.log(scale, 2)) // 2)
n = 256 * 2**int((math.log(scale, 2)) // 2)
x = ti.Vector.field(3, dtype=ti.f32, shape=(n, n))
new_x = ti.Vector.field(3, dtype=ti.f32, shape=(n, n))
v = ti.Vector.field(2, dtype=ti.f32, shape=(n, n))
Expand Down
7 changes: 4 additions & 3 deletions benchmarks/async_cases.py
Expand Up @@ -165,8 +165,9 @@ def task():

@benchmark_async
def mpm_splitted(scale):
quality = int(scale**(1 /
3)) # Use a larger value for higher-res simulations
quality = int(3 * scale**(1 / 3))
# Use a larger value for higher-res simulations

n_particles, n_grid = 9000 * quality**2, 128 * quality
dx, inv_dx = 1 / n_grid, float(n_grid)
dt = 1e-4 / quality
Expand Down Expand Up @@ -294,7 +295,7 @@ def task():
for s in range(int(2e-3 // dt)):
substep()

ti.benchmark(task, repeat=10)
ti.benchmark(task, repeat=5)


@benchmark_async
Expand Down
14 changes: 12 additions & 2 deletions benchmarks/benchmark_async.py
Expand Up @@ -21,6 +21,16 @@

ti.benchmark_plot(fn='benchmark.yml',
cases=case_names,
archs=['x64', 'cuda'],
columns=[
'wall_clk_t', 'exec_t', 'launched_tasks',
'compiled_inst', 'compiled_tasks'
],
column_titles=[
'Wall-clock time', 'Backend time', 'Tasks launched',
'Instructions emitted', 'Tasks compiled'
],
archs=['cuda', 'x64'],
title='Whole-Program Optimization Microbenchmarks',
bars='sync_vs_async',
left_margin=0.2)
left_margin=0.2,
size=(11.5, 9))
5 changes: 4 additions & 1 deletion benchmarks/utils.py
Expand Up @@ -9,7 +9,10 @@ def body():
for arch in [ti.cpu, ti.cuda]:
for async_mode in [True, False]:
os.environ['TI_CURRENT_BENCHMARK'] = func.__name__
ti.init(arch=arch, async_mode=async_mode, kernel_profiler=True)
ti.init(arch=arch,
async_mode=async_mode,
kernel_profiler=True,
verbose=False)
if arch == ti.cpu:
scale = 2
else:
Expand Down
21 changes: 14 additions & 7 deletions python/taichi/lang/__init__.py
Expand Up @@ -371,20 +371,23 @@ def run_benchmark():
avg = elapsed / repeat
ti.stat_write('wall_clk_t', avg)
device_time = ti.kernel_profiler_total_time()
ti.stat_write('exec_t', device_time)
avg_device_time = device_time / repeat
ti.stat_write('exec_t', avg_device_time)

run_benchmark()


def benchmark_plot(fn=None,
cases=None,
columns=None,
column_titles=None,
archs=None,
title=None,
bars='sync_vs_async',
bar_width=0.4,
bar_distance=0,
left_margin=0):
left_margin=0,
size=(12, 8)):
import taichi as ti
import yaml
import matplotlib.pyplot as plt
Expand Down Expand Up @@ -412,13 +415,15 @@ def benchmark_plot(fn=None,

if columns is None:
columns = list(data[cases[0]].keys())
if column_titles is None:
column_titles = columns
normalize_to_lowest = lambda x: True
figure, subfigures = plt.subplots(len(cases), len(columns))
if title is None:
title = 'Taichi Performance Benchmarks (Higher means more)'
figure.suptitle(title, fontweight="bold")
for col_id in range(len(columns)):
subfigures[0][col_id].set_title(columns[col_id])
subfigures[0][col_id].set_title(column_titles[col_id])
for case_id in range(len(cases)):
case = cases[case_id]
subfigures[case_id][0].annotate(
Expand All @@ -435,7 +440,9 @@ def benchmark_plot(fn=None,
if archs is None:
current_archs = data[case][col].keys()
else:
current_archs = archs & data[case][col].keys()
current_archs = [
x for x in archs if x in data[case][col].keys()
]
if bars == 'sync_vs_async':
y_left = [
data[case][col][arch]['sync'] for arch in current_archs
Expand Down Expand Up @@ -480,23 +487,23 @@ def benchmark_plot(fn=None,
height=y_left,
width=bar_width,
label=label_left,
color=(0.3, 0.7, 0.9, 1.0))
color=(0.47, 0.69, 0.89, 1.0))
bar_right = ax.bar(x=[
i + bar_width / 2 + bar_distance / 2
for i in range(len(current_archs))
],
height=y_right,
width=bar_width,
label=label_right,
color=(0.8, 0.2, 0.3, 1.0))
color=(0.68, 0.26, 0.31, 1.0))
ax.set_xticks(range(len(current_archs)))
ax.set_xticklabels(current_archs)
figure.legend((bar_left, bar_right), (label_left, label_right),
loc='lower center')
figure.subplots_adjust(left=left_margin)

fig = plt.gcf()
fig.set_size_inches(13, 8)
fig.set_size_inches(size)

plt.show()

Expand Down
3 changes: 2 additions & 1 deletion taichi/program/compile_config.h
Expand Up @@ -70,7 +70,8 @@ struct CompileConfig {
bool async_opt_dse{true};
bool async_listgen_fast_filtering{true};
std::string async_opt_intermediate_file;
int async_flush_every{0};
// Setting 0 effectively means do not automatically flush
int async_flush_every{50};
// Setting 0 effectively means unlimited
int async_max_fuse_per_task{1};

Expand Down

0 comments on commit 1b73055

Please sign in to comment.