In [1]:
import analyzer
from snowflake.nodes import *
from snowflake.vector import Vector
from ctree.frontend import dump
from snowflake.stencil_compiler import CCompiler
import time
%pylab inline
!ctree -cc

Populating the interactive namespace from numpy and matplotlib
ctree looking for relative cache directories named compiled, checking directories under this one



`%matplotlib` prevents importing * from pylab and numpy


In [2]:
SIZE = 160
NDIM = 4
DECOMP = 2
ITER = 100

In [3]:
def create_component(ndim, dim):
    # each component is one dimension -> A[i] = B[i](A[i] - A[i-1]) + B[i+1](A[i+1] - A[i])
    lower_diff = StencilComponent("mesh", 
                                  SparseWeightArray({Vector.zero_vector(ndim): 1, -Vector.unit_vector(dim, ndim): -1}))
    lower_diff *= StencilComponent("beta_{}".format(dim), 
                                 SparseWeightArray({Vector.zero_vector(ndim): 1}))
    upper_diff = StencilComponent("mesh", 
                                  SparseWeightArray({Vector.zero_vector(ndim): -1, Vector.unit_vector(dim, ndim): 1}))
    upper_diff *= StencilComponent("beta_{}".format(dim), 
                                 SparseWeightArray({Vector.unit_vector(dim, ndim): 1}))

    return lower_diff + upper_diff


def create_stencil(ndim):
    components = [create_component(ndim, dim) for dim in range(ndim)] ## creates components for beta_1 to beta_n
    total = sum(components)
    return Stencil(total, "out", [(1, -1, 1)]*ndim, "mesh")

In [4]:
sten = create_stencil(NDIM)

In [5]:
decomp = analyzer.decompose(sten, DECOMP)

In [6]:
# for d in decomp:
#     print(dump(d))
#     print("\n"*3)

In [7]:
optimized = analyzer.repackage(sten, NDIM, DECOMP)
# print(dump(optimized))

In [8]:
naive_compiler = CCompiler()
# naive_compiler.tile_size = (107, 107, 107)
optimized_compiler = CCompiler()
# optimized_compiler.tile_size = (86, 86, 86)

In [9]:
naive_kernel = naive_compiler.compile(StencilGroup([sten]*ITER))
optimized_kernel = optimized_compiler.compile(StencilGroup(optimized.body*ITER))

In [10]:
test_in = np.arange((SIZE + 2)**NDIM, dtype=np.float).reshape((SIZE+2,)*NDIM)

In [11]:
test_out = np.zeros_like(test_in)
test_opt_out = np.zeros_like(test_in)

In [12]:
naive_args = [test_in] + [test_in]*NDIM + [test_out]
naive_kernel(*naive_args)
opt_args = [test_in] + [test_in]*NDIM + [test_opt_out]
optimized_kernel(*opt_args)

In [13]:
naive_kernel.arg_spec

('mesh', 'beta_0', 'beta_1', 'beta_2', 'beta_3', 'out')

In [14]:
optimized_kernel.arg_spec

('mesh', 'beta_0', 'beta_1', 'beta_2', 'beta_3', 'out')

In [15]:

t_naive = -time.time()
for i in range(10):
    naive_kernel(*naive_args)
t_naive += time.time()
t_opt = -time.time()
for i in range(10):
    optimized_kernel(*opt_args)
t_opt += time.time()

In [16]:
print(t_naive)
print(t_opt)

264.200270891
262.018288851
