diff --git a/docs/lang/articles/advanced/quant.md b/docs/lang/articles/advanced/quant.md new file mode 100644 index 0000000000000..3ed4972b42f60 --- /dev/null +++ b/docs/lang/articles/advanced/quant.md @@ -0,0 +1,249 @@ +--- +sidebar_position: 3 +--- + +# Using quantized data types + +High-resolution simulations can deliver great visual quality, but they are often +limited by available memory, especially on GPUs. For the sake of saving memory, +Taichi provides low-precision ("quantized") data types. You can define your own integers, +fixed-point numbers or floating-point numbers with non-standard number of bits so +that you can choose a proper setting with minimum memory for your applications. +Taichi provides a suite of tailored domain-specific optimizations to ensure the +runtime performance with quantized data types close to that with full-precision +data types. + +:::note +Quantized data types are only supported on CPU and CUDA backends for now. +::: + +## Quantized data types + +### Quantized integers + +Modern computers represent integers using the [two's complement](https://en.wikipedia.org/wiki/Two%27s_complement) +format. *Quantized integers* in Taichi adopt the same format, and can contain +non-standard number of bits: + +```python +i10 = ti.types.quant.int(bits=10) # 10-bit signed (default) integer type +u5 = ti.types.quant.int(bits=5, signed=False) # 5-bit unsigned integer type +``` + +### Quantized fixed-point numbers + +[Fixed-point numbers](https://en.wikipedia.org/wiki/Fixed-point_arithmetic) are +an old way to represent real numbers. The internal representation of a fixed-point number is simply an integer, and +its actual value equals to the integer multiplied by a predefined scaling +factor. Based on the support for quantized integers, Taichi provides *quantized +fixed-point numbers* as follows: + +```python +fixed_type_a = ti.types.quant.fixed(bits=10, max_value=20.0) # 10-bit signed (default) fixed-point type within [-20.0, 20.0] +fixed_type_b = ti.types.quant.fixed(bits=5, signed=False, max_value=100.0) # 5-bit unsigned fixed-point type within [0.0, 100.0] +fixed_type_c = ti.types.quant.fixed(bits=6, signed=False, scale=1.0) # 6-bit unsigned fixed-point type within [0, 64.0] +``` + +`scale` is the scaling factor mentioned above. Because fixed-point numbers are +especially useful when you know the actual value is guaranteed to be within a +range, Taichi allows you to simply set `max_value` and will calculate the +scaling factor accordingly. + +### Quantized floating-point numbers + +[Floating-point numbers](https://en.wikipedia.org/wiki/Floating-point_arithmetic) +are the standard way to represent real numbers on modern computers. A +floating-point number is composed of exponent bits, fraction bits, and a sign +bit. There are various floating-point formats: + +![image](../static/assets/floating-point_formats.png) + +In Taichi, you can define a *quantized floating-point number* with arbitrary +combination of exponent bits and fraction bits (the sign bit is made part of +fraction bits): + +```python +float_type_a = ti.types.quant.float(exp=5, frac=10) # 15-bit signed (default) floating-point type with 5 exponent bits +float_type_b = ti.types.quant.float(exp=6, frac=9, signed=False) # 15-bit unsigned floating-point type with 6 exponent bits +``` + +### Compute types + +All the parameters you've seen above are specifying the *storage type* of a +quantized data type. However, most quantized data types have no native support +on hardware, so an actual value of that quantized data type needs to convert to +a primitive type ("*compute type*") when it is involved in computation. + +The default compute type for quantized integers is `ti.i32`, while the default +compute type for quantized fixed-point/floating-point numbers is `ti.f32`. You +can change the compute type by specifying the `compute` parameter: + +```python +i21 = ti.types.quant.int(bits=21, compute=ti.i64) +bfloat16 = ti.types.quant.float(exp=8, frac=8, compute=ti.f32) +``` + +## Data containers for quantized data types + +Because the storage types are not primitive types, you may now wonder how +quantized data types can work together with data containers that Taichi +provides. In fact, some special constructs are introduced to eliminate the gap. + +### Bitpacked fields + +`ti.BitpackedFields` packs a group of fields whose `dtype`s are +quantized data types together so that they are stored with one primitive type. +You can then place a `ti.BitpackedFields` instance under any SNode as if each member field +is placed individually. + +```python +a = ti.field(float_type_a) # 15 bits +b = ti.field(fixed_type_b) # 5 bits +c = ti.field(fixed_type_c) # 6 bits +d = ti.field(u5) # 5 bits +bitpack = ti.BitpackedFields(max_num_bits=32) +bitpack.place(a, b, c, d) # 31 out of 32 bits occupied +ti.root.dense(ti.i, 10).place(bitpack) +``` + +#### Shared exponent + +When multiple fields with quantized floating-point types are packed together, +there is chance that they can share a common exponent. For example, in a 3D +velocity vector, if you know the x-component has a much larger absolute value +compared to y- and z-components, then you probably do not care about the exact +value of the y- and z-components. In this case, using a shared exponent can +leave more bits for components with larger absolute values. You can use +`place(x, y, z, shared_exponent=True)` to make fields `x, y, z` share a common +exponent. + +#### Your first program + +You probably cannot wait to write your first Taichi program with quantized data +types. The easiest way is to modify the data definitions of an existing example. +Assume you want to save memory for +[examples/simulation/euler.py](https://github.com/taichi-dev/taichi/blob/master/python/taichi/examples/simulation/euler.py). +Because most data definitions in the example are similar, here only field `Q` is +used for illustration: + +```python +Q = ti.Vector.field(4, dtype=ti.f32, shape=(N, N)) +``` + +An element of `Q` now occupies 4 x 32 = 128 bits. If you can fit it in +64 bits, then the memory usage is halved. A direct and first attempt is to +use quantized floating-point numbers with a shared exponent: + +```python +float_type_c = ti.types.quant.float(exp=8, frac=14) +Q_old = ti.Vector.field(4, dtype=float_type_c) +bitpack = ti.BitpackedFields(max_num_bits=64) +bitpack.place(Q_old, shared_exponent=True) +ti.root.dense(ti.ij, (N, N)).place(bitpack) +``` + +Surprisingly, you find that there is no obvious difference in visual effects +after the change, and you now successfully finish a Taichi program with +quantized data types! More attempts are left to you. + +#### More complicated quantization schemes + +Here comes a more complicated scenario. In a 3D Eulerian fluid simulation, a +voxel may need to store a 3D vector for velocity, and an integer value for cell +category with three possible values: "source", "Dirichlet boundary", and +"Neumann boundar". You can actually store all information with a single 32-bit +`ti.BitpackedFields`: + +```python +velocity_component_type = ti.types.quant.float(exp=6, frac=8, compute=ti.f32) +velocity = ti.Vector.field(3, dtype=velocity_component_type) + +# Since there are only three cell categories, 2 bits are enough. +cell_category_type = ti.types.quant.int(bits=2, signed=False, compute=ti.i32) +cell_category = ti.field(dtype=cell_category_type) + +voxel = ti.BitpackedFields(max_num_bits=32) +# Place three components of velocity into the voxel, and let them share the exponent. +voxel.place(velocity, shared_exponent=True) +# Place the 2-bit cell category. +voxel.place(cell_category) +# Create 512 x 512 x 256 voxels. +ti.root.dense(ti.ijk, (512, 512, 256)).place(voxel) +``` + +The compression scheme above allows you to store 13 bytes (4B x 3 + 1B) into +just 4 bytes. Note that you can still use velocity and cell_category in the +computation code, as if they are `ti.f32` and `ti.u8`. + +![image](../static/assets/bitpacked_fields_layout_example.png) + +### Quant arrays + +Bitpacked fields are actually laid in an array of structure (AOS) order. +However, there are also cases where a single quantized type is required to get +laid in an array. For example, you may want to store 8 x u4 values in a single +u32 type, to represent bin values of a histogram: + +![image](../static/assets/quant_array_layout_example.png) + +Quant array is exactly what you need. A `quant_array` is a SNode which +can reinterpret a primitive type into an array of a quantized type: + +```python +bin_value_type = ti.types.quant.int(bits=4, signed=False) + +# The quant array for 512 x 512 bin values +array = ti.root.dense(ti.ij, (512, 64)).quant_array(ti.j, 8, max_num_bits=32) +# Place the unsigned 4-bit bin value into the array +array.place(bin_value_type) +``` + +:::note +1. Only one field can be placed under a `quant_array`. +2. Only quantized integer types and quantized fixed-point types are supported as +the `dtype` of the field under a `quant_array`. +3. The size of the `dtype` of the field times the shape of the `quant_array` +must be less than or equal to the `max_num_bits` of the `quant_array`. +::: + +#### Bit vectorization + +For quant arrays of 1-bit quantized integer types ("boolean"), Taichi provides +an additional optimization - bit vectorization. It aims at vectorizing +operations on such quant arrays under struct fors: + +```python +u1 = ti.types.quant.int(1, False) +N = 512 +M = 32 +x = ti.field(dtype=u1) +y = ti.field(dtype=u1) +ti.root.dense(ti.i, N // M).quant_array(ti.i, M, max_num_bits=M).place(x) +ti.root.dense(ti.i, N // M).quant_array(ti.i, M, max_num_bits=M).place(y) + +@ti.kernel +def assign_vectorized(): + ti.loop_config(bit_vectorize=True) + for i, j in x: + y[i, j] = x[i, j] # 32 bits are handled at a time + +assign_vectorized() +``` + +## Advanced examples + +The following examples are picked from the +[QuanTaichi paper](https://yuanming.taichi.graphics/publication/2021-quantaichi/quantaichi.pdf), +so you can dig into details there. + +### [Game of Life](https://github.com/taichi-dev/quantaichi/tree/main/gol) + +![image](https://github.com/taichi-dev/quantaichi/raw/main/pics/teaser_gol.jpg) + +### [Eulerian Fluid](https://github.com/taichi-dev/quantaichi/tree/main/eulerian_fluid) + +![image](https://github.com/taichi-dev/quantaichi/raw/main/pics/smoke_result.png) + +### [MLS-MPM](https://github.com/taichi-dev/taichi_elements/blob/master/demo/demo_quantized_simulation_letters.py) + +![image](https://github.com/taichi-dev/quantaichi/raw/main/pics/mpm-235.jpg) diff --git a/docs/lang/articles/differentiable/differentiable_programming.md b/docs/lang/articles/differentiable/differentiable_programming.md index b95fa0b0bc3b0..d0a1b1443e203 100644 --- a/docs/lang/articles/differentiable/differentiable_programming.md +++ b/docs/lang/articles/differentiable/differentiable_programming.md @@ -448,3 +448,79 @@ Check out [the DiffTaichi paper](https://arxiv.org/pdf/1910.00935.pdf) and [video](https://www.youtube.com/watch?v=Z1xvAZve9aE) to learn more about Taichi differentiable programming. ::: + + +## Forward-Mode Autodiff + +There are two modes of automatic differentiation, forward and reverse mode. The forward mode provides a function to compute Jacobian-Vector Product (JVP), which can compute one column of the Jacobian matrix at a time. The reverse mode supports computing Vector-Jacobian Product (VJP), i.e., one row of the Jacobian matrix at a time. Therefore, for functions which have more inputs than outputs, reverse mode is more efficient. The `ti.ad.Tape` and `kernel.grad()` are built on the reverse mode. The forward mode is more efficient when handling functions whose outputs are more than inputs. Taichi autodiff also supports forward mode. + +### Using `ti.ad.FwdMode` +The usage of `ti.ad.FwdMode` is very similar to `ti.ad.Tape`. Here we reuse the example for reverse mode above for an explanation. +1. Enable `needs_dual=True` option when declaring fields involved in the derivative chain. +2. Use context manager with `ti.ad.FwdMode(loss=y, param=x)`: to capture the kernel invocations which you want to automatically differentiate. The `loss` and `param` are the output and input of the function respectively. +3. Now dy/dx value at current x is available at function output `y.dual[None]`. +The following code snippet explains the steps above: + +```python +import taichi as ti +ti.init() + +x = ti.field(dtype=ti.f32, shape=(), needs_dual=True) +y = ti.field(dtype=ti.f32, shape=(), needs_dual=True) + + +@ti.kernel +def compute_y(): + y[None] = ti.sin(x[None]) + + +with ti.ad.FwdMode(loss=y, param=x): + compute_y() + +print('dy/dx =', y.dual[None], ' at x =', x[None]) +``` + +:::note +The `dual` here indicates `dual number`in math. The reason for using the name is that forwar-mode autodiff is equivalent to evaluating function with dual numbers. +::: + +:::note +The `ti.ad.FwdMode` automatically clears the dual field of `loss`. +::: + +ti.ad.FwdMode support multiple inputs and outputs. The param can be a N-D field and the loss can be an individual or a list of N-D fields. The argument `seed` is the 'vector' in Jacobian-vector product, which used to control the parameter that is computed derivative with respect to. Here we show three cases with multiple inputs and outputs. With `seed=[1.0, 0.0] `or `seed=[0.0, 1.0]` , we can compute the derivatives solely with respect to `x_0` or `x_1`. + +```python +import taichi as ti +ti.init() +N_param = 2 +N_loss = 5 +x = ti.field(dtype=ti.f32, shape=N_param, needs_dual=True) +y = ti.field(dtype=ti.f32, shape=N_loss, needs_dual=True) + + +@ti.kernel +def compute_y(): + for i in range(N_loss): + for j in range(N_param): + y[i] += i * ti.sin(x[j]) + + +# Compute derivatives respect to x_0 +with ti.ad.FwdMode(loss=y, param=x, seed=[1.0, 0.0]): + compute_y() +print('dy/dx_0 =', y.dual, ' at x_0 =', x[0]) + +# Compute derivatives respect to x_1 +with ti.ad.FwdMode(loss=y, param=x, seed=[0.0, 1.0]): + compute_y() +print('dy/dx_1 =', y.dual, ' at x_1 =', x[1]) +``` + +:::note +The `seed` argument is required if the `param` is not a scalar field. +::: + +:::tip +Similar to reverse mode autodiff, Taichi provides an API `ti.root.lazy_dual()` that automatically places the dual fields following the layout of their primal fields. +::: diff --git a/docs/lang/articles/static/assets/bitpacked_fields_layout_example.png b/docs/lang/articles/static/assets/bitpacked_fields_layout_example.png new file mode 100644 index 0000000000000..4184cd60c2749 Binary files /dev/null and b/docs/lang/articles/static/assets/bitpacked_fields_layout_example.png differ diff --git a/docs/lang/articles/static/assets/floating-point_formats.png b/docs/lang/articles/static/assets/floating-point_formats.png new file mode 100644 index 0000000000000..9250254d0b9ee Binary files /dev/null and b/docs/lang/articles/static/assets/floating-point_formats.png differ diff --git a/docs/lang/articles/static/assets/quant_array_layout_example.png b/docs/lang/articles/static/assets/quant_array_layout_example.png new file mode 100644 index 0000000000000..05fbe7a765daa Binary files /dev/null and b/docs/lang/articles/static/assets/quant_array_layout_example.png differ diff --git a/docs/lang/articles/visualization/ggui.md b/docs/lang/articles/visualization/ggui.md index 8101c44845bc4..cdfa44e1c7993 100644 --- a/docs/lang/articles/visualization/ggui.md +++ b/docs/lang/articles/visualization/ggui.md @@ -94,6 +94,7 @@ Note that you need to call `point_light()` for every frame. Similar to the `canv ### 3D Geometries ```python +scene.lines(vertices, width, indices, color, per_vertex_color) scene.mesh(vertices, indices, normals, color, per_vertex_color) scene.particles(vertices, radius, color, per_vertex_color) ``` @@ -108,6 +109,220 @@ If a mesh has `num` triangles, the `indices` should be a 1D scalar field with a `normals` is an optional parameter for `scene.mesh()`. +:::example + +1. An example of drawing 3d-lines + +```python +import taichi as ti + +ti.init(arch=ti.cuda) + +N = 10 + +particles_pos = ti.Vector.field(3, dtype=ti.f32, shape = N) +points_pos = ti.Vector.field(3, dtype=ti.f32, shape = N) + +@ti.kernel +def init_points_pos(points : ti.template()): + for i in range(points.shape[0]): + points[i] = [i for j in ti.static(range(3))] + +init_points_pos(particles_pos) +init_points_pos(points_pos) + +window = ti.ui.Window("Test for Drawing 3d-lines", (768, 768)) +canvas = window.get_canvas() +scene = ti.ui.Scene() +camera = ti.ui.make_camera() +camera.position(5, 2, 2) + +while window.running: + camera.track_user_inputs(window, movement_speed=0.03, hold_key=ti.ui.RMB) + scene.set_camera(camera) + scene.ambient_light((0.8, 0.8, 0.8)) + scene.point_light(pos=(0.5, 1.5, 1.5), color=(1, 1, 1)) + + scene.particles(particles_pos, color = (0.68, 0.26, 0.19), radius = 0.1) + # Draw 3d-lines in the scene + scene.lines(points_pos, color = (0.28, 0.68, 0.99), width = 5.0) + canvas.scene(scene) + window.show() +``` + +### Advanced 3d Geometries + +```python +scene.lines(vertices, width, indices, color, per_vertex_color, vertex_offset, vertex_count, index_offset, index_count) + +scene.mesh(vertices, indices, normals, color, per_vertex_color, vertex_offset, vertex_count, index_offset, index_count, show_wireframe) + +scene.particles(vertices, radius, color, per_vertex_color, index_offset, index_count) + +scene.mesh_instance(vertices, indices, normals, color, per_vertex_color, vertex_offset, vertex_count, index_offset, index_count, show_wireframe) +``` + +The additional arguments `vertex_offset`, `vertex_count`, `index_offset` and `index_count` control the visible part of the particles and mesh. For the `mesh()` and `mesh_instance()` methods, set whether to show wireframe mode through setting `show_wireframe`. + +:::example + +1. Example of drawing a part of the mesh/particles + +```python +# For particles +# draw the 2-th to 7-th particles +scene.particles(center, radius, +index_offset = 1, +index_count = 6) + +# For mesh +# 1. with indices +scene.mesh(vertices, indices, +index_offset = user_defined_first_indices_index, +index_count = user_defined_index_count, +# vertex_offset is set to 0 by default, and it is not necessary +# to assign vertex_offset a value that otherwise you must. +vertex_offset = user_defined_vertex_offset) + +# usually used as below: +# draw the 11-th to 111-th mesh vertexes +scene.mesh(vertices, indices, +index_offset = 10, +index_count = 100) + +# 2. without indices (similar to the particles' example above) +scene.mesh(vertices, +vertex_offset = user_defined_first_vertex_index, +vertex_count = user_defined_vertex_count) +``` +2. An example of drawing part of lines +```python +import taichi as ti + +ti.init(arch=ti.cuda) + +N = 10 + +particles_pos = ti.Vector.field(3, dtype=ti.f32, shape = N) +points_pos = ti.Vector.field(3, dtype=ti.f32, shape = N) +points_indices = ti.Vector.field(1, dtype=ti.i32, shape = N) + +@ti.kernel +def init_points_pos(points : ti.template()): + for i in range(points.shape[0]): + points[i] = [i for j in range(3)] + # points[i] = [ti.sin(i * 1.0), i * 0.2, ti.cos(i * 1.0)] + +@ti.kernel +def init_points_indices(points_indices : ti.template()): + for i in range(N): + points_indices[i][0] = i // 2 + i % 2 + +init_points_pos(particles_pos) +init_points_pos(points_pos) +init_points_indices(points_indices) + +window = ti.ui.Window("Test for Drawing 3d-lines", (768, 768)) +canvas = window.get_canvas() +scene = ti.ui.Scene() +camera = ti.ui.make_camera() +camera.position(5, 2, 2) + +while window.running: + camera.track_user_inputs(window, movement_speed=0.03, hold_key=ti.ui.RMB) + scene.set_camera(camera) + scene.ambient_light((0.8, 0.8, 0.8)) + scene.point_light(pos=(0.5, 1.5, 1.5), color=(1, 1, 1)) + + scene.particles(particles_pos, color = (0.68, 0.26, 0.19), radius = 0.1) + # Here you will get visible part from the 3rd point with (N - 4) points. + scene.lines(points_pos, color = (0.28, 0.68, 0.99), width = 5.0, vertex_count = N - 4, vertex_offset = 2) + # Using indices to indicate which vertex to use + # scene.lines(points_pos, color = (0.28, 0.68, 0.99), width = 5.0, indices = points_indices) + # Case 1, vertex_count will be changed to N - 2 when drawing. + # scene.lines(points_pos, color = (0.28, 0.68, 0.99), width = 5.0, vertex_count = N - 1, vertex_offset = 0) + # Case 2, vertex_count will be changed to N - 2 when drawing. + # scene.lines(points_pos, color = (0.28, 0.68, 0.99), width = 5.0, vertex_count = N, vertex_offset = 2) + canvas.scene(scene) + window.show() +``` + +3. Details of mesh instancing +```python +num_instance = 100 +m_transforms = ti.Matrix.field(4, 4, dtype = ti.f32, shape = num_instance) + + +# For example: An object is scaled by 2, rotated by rotMat, and translated by t = [1, 2, 3], then +# +# The ScaleMatrix is: +# 2, 0, 0, 0 +# 0, 2, 0, 0 +# 0, 0, 2, 0 +# 0, 0, 0, 1 +# +# The RotationMatrix is: +# https://en.wikipedia.org/wiki/Rotation_matrix#General_rotations +# +# The TranslationMatrix is: +# 1, 0, 0, 1 +# 0, 1, 0, 2 +# 0, 0, 1, 3 +# 0, 0, 0, 1 +# +# Let TransformMatrix = TranslationMatrix @ RotationMatrix @ ScaleMatrix, then the final TransformMatrix is: +# 2 * rotMat00, rotMat01, rotMat02, 1 +# rotMat10, 2 * rotMat11, rotMat12, 2 +# rotMat20, rotMat21, 2 * rotMat22, 3 +# 0, 0, 0, 1 +... + +# Draw mesh instances (from the 1st instance) +scene.mesh_instance(vertices, indices, transforms = m_transforms, instance_offset = 1) +``` +4. Example of setting wireframe mode +```python + +window = ti.ui.Window("Display Mesh", (1024, 1024), vsync=True) +canvas = window.get_canvas() +scene = ti.ui.Scene() +camera = ti.ui.make_camera() + +# slider_int usage +some_int_type_value = 0 +def show_options(): + global some_int_type_value + + window.GUI.begin("Display Panel", 0.05, 0.1, 0.2, 0.15) + display_mode = window.GUI.slider_int("Value Range", some_int_type_value, 0, 5) + window.GUI.end() + +while window.running: + + ... + # if to show wireframe + scene.mesh_instance(vertices, indices, instance_count = 100 , show_wireframe = True) + + canvas.scene(scene) + show_options() + window.show() +``` + + + +:::note + +If `indices` is not provided, consider using like this: +```python +scene.mesh(vertices, normals, color, per_vertex_color, vertex_offset, vertex_count, wireframe) +``` +If `indices` is provided, consider using like this: +```python +scene.mesh(vertices, indices, normals, color, per_vertex_color, vertex_offset, index_offset, index_count, wireframe) +``` + + + ::: ### Rendering the scene @@ -118,6 +333,55 @@ You can render a scene on a canvas. canvas.scene(scene) ``` +### Fetching Color/Depth information + +```python +img = window.get_image_buffer() +window.get_depth_buffer(scene_depth) +depth = window.get_depth_buffer_as_numpy() +``` + +After rendering the current scene, you can fetch the color and depth information of the current scene using `get_image_buffer()` and `get_depth_buffer_as_numpy()`, which copy the gpu data to a NumPy array(cpu). +`get_depth_buffer()` copies the GPU data to a Taichi field (depend on the `arch` you choose) or copies data from GPU to GPU. + +:::example + +1. Example of fetching color information +```python +window = ti.ui.Window("Test for getting image buffer from ggui", (768, 768), vsync=True) +video_manager = ti.tools.VideoManager("OutputDir") + +while window.running: + render_scene() + img = window.get_image_buffer() + video_manager.write_frame(img) + window.show() + +video_manager.make_video(gif=True, mp4=True) +``` + +2. An example of fetching the depth data +```python +window_shape = (720, 1080) +window = ti.ui.Window("Test for copy depth data", window_shape) +canvas = window.get_canvas() +scene = ti.ui.Scene() +camera = ti.ui.make_camera() + +# Get the shape of the window +w, h = window.get_window_shape() +# The field/ndarray stores the depth information, and must be of the ti.f32 data type and have a 2d shape. +# or, in other words, the shape must equal the window's shape +scene_depth = ti.ndarray(ti.f32, shape = (w, h)) +# scene_depth = ti.field(ti.f32, shape = (w, h)) + +while window.running: + render() + canvas.scene(scene) + window.get_depth_buffer(scene_depth) + window.show() +``` + ## GUI components The design of GGUI's GUI components follows the [Dear ImGui](https://github.com/ocornut/imgui) APIs. diff --git a/python/taichi/ad/_ad.py b/python/taichi/ad/_ad.py index b40aa587e4745..3b5e918767094 100644 --- a/python/taichi/ad/_ad.py +++ b/python/taichi/ad/_ad.py @@ -274,6 +274,11 @@ def shape_flatten(shape): else: assert parameters_shape_flatten == len(self.seed) + # Clear gradients + if self.clear_gradients: + # TODO: the clear gradients should be controlled to clear adjoint/dual/adjoint_visited respectively + clear_all_gradients() + # Set seed for each variable if len(self.seed) == 1: if len(self.param.shape) == 0: @@ -286,11 +291,6 @@ def shape_flatten(shape): for idx, s in enumerate(self.seed): self.param.dual[idx] = 1.0 * s - # Clear gradients - if self.clear_gradients: - for ls in self.loss: - ls.dual.fill(0) - # Attach the context manager to the runtime self.runtime.fwd_mode_manager = self diff --git a/python/taichi/lang/field.py b/python/taichi/lang/field.py index e36d88f43aafd..cfea4d10069f4 100644 --- a/python/taichi/lang/field.py +++ b/python/taichi/lang/field.py @@ -352,8 +352,9 @@ def __getitem__(self, key): # Check for potential slicing behaviour # for instance: x[0, :] padded_key = self._pad_key(key) + import numpy as np # pylint: disable=C0415 for key in padded_key: - if not isinstance(key, int): + if not isinstance(key, (int, np.integer)): raise TypeError( f"Detected illegal element of type: {type(key)}. " f"Please be aware that slicing a ti.field is not supported so far." diff --git a/python/taichi/lang/matrix.py b/python/taichi/lang/matrix.py index 629751e3412fb..ce1bdb60c443c 100644 --- a/python/taichi/lang/matrix.py +++ b/python/taichi/lang/matrix.py @@ -125,7 +125,7 @@ def _linearize_entry_id(self, *args): args = args + (0, ) # TODO(#1004): See if it's possible to support indexing at runtime for i, a in enumerate(args): - if not isinstance(a, int): + if not isinstance(a, (int, np.integer)): raise TaichiSyntaxError( f'The {i}-th index of a Matrix/Vector must be a compile-time constant ' f'integer, got {type(a)}.\n' diff --git a/python/taichi/ui/scene.py b/python/taichi/ui/scene.py index 074e455c4f949..46f95deb413a4 100644 --- a/python/taichi/ui/scene.py +++ b/python/taichi/ui/scene.py @@ -171,7 +171,7 @@ def mesh(self, vertex_count: int = None, index_offset: int = 0, index_count: int = None, - show_wareframe: bool = False): + show_wireframe: bool = False): """Declare a mesh inside the scene. if you indicate the index_offset and index_count, the normals will also @@ -206,7 +206,7 @@ def mesh(self, index_count (int, optional): only available when `indices` is provided, which is the the number of vertices to draw. - show_wareframe (bool, optional): + show_wireframe (bool, optional): turn on/off WareFrame mode. """ vbo = get_vbo_field(vertices) @@ -229,7 +229,7 @@ def mesh(self, self.scene.mesh(vbo_info, has_per_vertex_color, indices_info, color, two_sided, index_count, index_offset, vertex_count, - vertex_offset, show_wareframe) + vertex_offset, show_wireframe) def mesh_instance(self, vertices, @@ -245,7 +245,7 @@ def mesh_instance(self, vertex_count: int = None, index_offset: int = 0, index_count: int = None, - show_wareframe: bool = False): + show_wireframe: bool = False): """Declare mesh instances inside the scene. If transforms is given, then according to the shape of transforms, it will @@ -290,7 +290,7 @@ def mesh_instance(self, index_count (int, optional): only available when `indices` is provided, which is the the number of indices to draw. - show_wareframe (bool, optional): + show_wireframe (bool, optional): turn on/off WareFrame mode. """ vbo = get_vbo_field(vertices) @@ -319,7 +319,7 @@ def mesh_instance(self, color, two_sided, transform_info, instance_count, instance_offset, index_count, index_offset, vertex_count, vertex_offset, - show_wareframe) + show_wireframe) def particles(self, centers, diff --git a/python/taichi/ui/window.py b/python/taichi/ui/window.py index c605ead7824f2..2c84b0b9007e7 100644 --- a/python/taichi/ui/window.py +++ b/python/taichi/ui/window.py @@ -187,13 +187,13 @@ def get_depth_buffer_as_numpy(self): arr_vulkan_layout_to_arr_normal_layout(tmp_depth, depth_numpy_arr) return depth_numpy_arr - def get_image_buffer(self): + def get_image_buffer_as_numpy(self): """Get the window content to numpy array. Returns: 3d numpy array: [width, height, channels] with (0.0~1.0) float-format color. """ - return self.window.get_image_buffer() + return self.window.get_image_buffer_as_numpy() def destroy(self): """Destroy this window. The window will be unavailable then. diff --git a/taichi/python/export_ggui.cpp b/taichi/python/export_ggui.cpp index 7c5be1a917b24..bb3feec085ba7 100644 --- a/taichi/python/export_ggui.cpp +++ b/taichi/python/export_ggui.cpp @@ -176,7 +176,7 @@ struct PyScene { float draw_first_index, float draw_vertex_count, float draw_first_vertex, - bool show_wareframe) { + bool show_wireframe) { RenderableInfo renderable_info; renderable_info.vbo = vbo; renderable_info.has_per_vertex_color = has_per_vertex_color; @@ -186,7 +186,7 @@ struct PyScene { renderable_info.draw_first_index = (int)draw_first_index; renderable_info.draw_vertex_count = (int)draw_vertex_count; renderable_info.draw_first_vertex = (int)draw_first_vertex; - renderable_info.display_mode = show_wareframe + renderable_info.display_mode = show_wireframe ? taichi::lang::PolygonMode::Line : taichi::lang::PolygonMode::Fill; @@ -231,7 +231,7 @@ struct PyScene { float draw_first_index, float draw_vertex_count, float draw_first_vertex, - bool show_wareframe) { + bool show_wireframe) { RenderableInfo renderable_info; renderable_info.vbo = vbo; renderable_info.has_per_vertex_color = has_per_vertex_color; @@ -241,7 +241,7 @@ struct PyScene { renderable_info.draw_first_index = (int)draw_first_index; renderable_info.draw_vertex_count = (int)draw_vertex_count; renderable_info.draw_first_vertex = (int)draw_first_vertex; - renderable_info.display_mode = show_wareframe + renderable_info.display_mode = show_wireframe ? taichi::lang::PolygonMode::Line : taichi::lang::PolygonMode::Fill; @@ -484,7 +484,7 @@ void export_ggui(py::module &m) { .def("write_image", &PyWindow::write_image) .def("copy_depth_buffer_to_ndarray", &PyWindow::copy_depth_buffer_to_ndarray) - .def("get_image_buffer", &PyWindow::get_image_buffer) + .def("get_image_buffer_as_numpy", &PyWindow::get_image_buffer) .def("is_pressed", &PyWindow::is_pressed) .def("get_cursor_pos", &PyWindow::py_get_cursor_pos) .def("is_running", &PyWindow::is_running) diff --git a/taichi/transforms/auto_diff.cpp b/taichi/transforms/auto_diff.cpp index af42eb689298b..f853bf2a3a7e9 100644 --- a/taichi/transforms/auto_diff.cpp +++ b/taichi/transforms/auto_diff.cpp @@ -1367,6 +1367,23 @@ class BackupSSA : public BasicStmtVisitor { if (op->is()) { // Just create another AdStackLoadTopStmt stmt->set_operand(i, stmt->insert_before_me(op->clone())); + } else if (op->is()) { + // Backup AdStackAllocaStmt because it should not be local stored and + // local loaded + auto stack_alloca = op->as(); + if (backup_alloca.find(op) == backup_alloca.end()) { + auto backup_stack_alloca = Stmt::make( + stack_alloca->dt, stack_alloca->max_size); + auto backup_stack_alloca_ptr = backup_stack_alloca.get(); + independent_block->insert(std::move(backup_stack_alloca), 0); + backup_alloca[op] = backup_stack_alloca_ptr; + // Replace usages of all blocks i.e., the entry point for the + // replace is the top level block + irpass::replace_all_usages_with(leaf_to_root.back(), op, + backup_stack_alloca_ptr); + // Erase the outdated AdStackAllocaStmt + op->parent->erase(op); + } } else { auto alloca = load(op); TI_ASSERT(op->width() == 1); diff --git a/tests/python/test_ad_basics_fwd.py b/tests/python/test_ad_basics_fwd.py index 7fe7932a71f67..6eed8d8a9a7a0 100644 --- a/tests/python/test_ad_basics_fwd.py +++ b/tests/python/test_ad_basics_fwd.py @@ -103,3 +103,23 @@ def func(): with ti.ad.FwdMode(loss=d, param=c): func() + + +@test_utils.test() +def test_clear_all_dual_field(): + x = ti.field(float, shape=(), needs_dual=True) + y = ti.field(float, shape=(), needs_dual=True) + loss = ti.field(float, shape=(), needs_dual=True) + + x[None] = 2.0 + y[None] = 3.0 + + @ti.kernel + def clear_dual_test(): + y[None] = x[None]**2 + loss[None] += y[None] + + for _ in range(5): + with ti.ad.FwdMode(loss=loss, param=x): + clear_dual_test() + assert y.dual[None] == 4.0 diff --git a/tests/python/test_ad_math_func.py b/tests/python/test_ad_math_func.py new file mode 100644 index 0000000000000..34192d5d9beb9 --- /dev/null +++ b/tests/python/test_ad_math_func.py @@ -0,0 +1,20 @@ +import taichi as ti +from tests import test_utils + + +@test_utils.test(require=ti.extension.adstack, dynamic_index=False) +def test_polar_decompose_2D(): + # `polar_decompose3d` in current Taichi version (v1.1) does not support autodiff, + # becasue it mixed usage of for-loops and statements without looping. + dim = 2 + F_1 = ti.Matrix.field(dim, dim, dtype=ti.f32, shape=(), needs_grad=True) + F = ti.Matrix.field(dim, dim, dtype=ti.f32, shape=(), needs_grad=True) + loss = ti.field(dtype=ti.f32, shape=(), needs_grad=True) + + @ti.kernel + def polar_decompose_2D(): + r, s = ti.polar_decompose(F[None]) + F_1[None] += r + + with ti.ad.Tape(loss=loss): + polar_decompose_2D() diff --git a/tests/python/test_field.py b/tests/python/test_field.py index 97f3db484e127..b7af5e70c21b4 100644 --- a/tests/python/test_field.py +++ b/tests/python/test_field.py @@ -2,6 +2,7 @@ To test our new `ti.field` API is functional (#1500) ''' +import numpy as np import pytest from taichi.lang import impl from taichi.lang.misc import get_host_arch_list @@ -282,6 +283,27 @@ def test_invalid_slicing(): val[0, :] +@test_utils.test() +def test_indexing_with_np_int(): + val = ti.field(ti.i32, shape=(2)) + idx = np.int32(0) + val[idx] + + +@test_utils.test() +def test_indexing_vec_field_with_np_int(): + val = ti.Vector.field(2, ti.i32, shape=(2)) + idx = np.int32(0) + val[idx][idx] + + +@test_utils.test() +def test_indexing_mat_field_with_np_int(): + val = ti.Matrix.field(2, 2, ti.i32, shape=(2)) + idx = np.int32(0) + val[idx][idx, idx] + + @test_utils.test(exclude=[ti.cc], debug=True) def test_field_fill(): x = ti.field(int, shape=(3, 3)) diff --git a/version.txt b/version.txt index 80e0d762db8b2..795460fcec881 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v1.0.5 +v1.1.0