Skip to content

Commit

Permalink
#7511: adjust fidelity, subblock index order, and test for matmul
Browse files Browse the repository at this point in the history
  • Loading branch information
bbradelTT committed May 12, 2024
1 parent 780b0f3 commit 06a9d66
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 5 deletions.
4 changes: 2 additions & 2 deletions tests/ttnn/unit_tests/test_multi_device.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,15 +449,15 @@ def test_sharded_matmul(t3k_device_mesh):

def test_4b_tensor(device_mesh):
tensor = ttnn.from_torch(
torch.randn(1, 1, 64, 64),
torch.randn(1, 1, 32, 32),
dtype=ttnn.bfloat4_b,
layout=ttnn.TILE_LAYOUT,
device=device_mesh,
mesh_mapper=ReplicateTensorToMesh(device_mesh),
)
tensor = ttnn.to_device(tensor, device_mesh)
x = ttnn.from_torch(
torch.randn(1, 1, 64, 64),
torch.randn(1, 1, 32, 32),
dtype=ttnn.bfloat16,
layout=ttnn.TILE_LAYOUT,
device=device_mesh,
Expand Down
4 changes: 2 additions & 2 deletions tt_eager/tt_dnn/op_library/bmm/bmm_op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -513,8 +513,8 @@ tt::operations::primary::MatmulProgramConfig get_matmul_program_config(const Ten
tuple<uint32_t, uint32_t> get_subblock_sizes(uint32_t m_tiles_per_core, uint32_t n_tiles_per_core, bool fp32_dest_acc_en) {
uint32_t out_subblock_h, out_subblock_w;
for (auto &subblock_hw : SUBBLOCK_HW_CHOICES) {
out_subblock_h = std::get<0>(subblock_hw);
out_subblock_w = std::get<1>(subblock_hw);
out_subblock_w = std::get<0>(subblock_hw);
out_subblock_h = std::get<1>(subblock_hw);
if ((out_subblock_h * out_subblock_w) <= 4 || !fp32_dest_acc_en) {
if (m_tiles_per_core % out_subblock_h == 0 && n_tiles_per_core % out_subblock_w == 0) {
return {out_subblock_h, out_subblock_w};
Expand Down
4 changes: 3 additions & 1 deletion tt_eager/tt_dnn/op_library/bmm/bmm_op.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,8 @@ MatmulProgramConfig create_matmul_program_config(const Tensor& input_tensor_a, c
namespace bmm_op_utils {
using namespace tt::tt_metal;

// Ensure there are always symmetrical values. Different paths use different
// index ordering (0,1 vs 1,0) to meet test PCC requirements.
constexpr std::array<tuple<uint32_t, uint32_t>, 20> SUBBLOCK_HW_CHOICES = {{
{4, 2}, {2, 4}, {8, 1}, {1, 8},
{7, 1}, {1, 7},
Expand Down Expand Up @@ -401,7 +403,7 @@ inline Tensor matmul(
const auto& input_tensor_a = input_tensors.at(0);
const auto& input_tensor_b = input_tensors.at(1);
auto arch = input_tensor_a.device()->arch();
const auto increase_fidelity = is_program_config_default(program_config) || user_core_coord.has_value();
const auto increase_fidelity = is_program_config_default(program_config) && !user_core_coord.has_value();
auto math_fidelity = increase_fidelity ? MathFidelity::HiFi2 : MathFidelity::LoFi;
auto kernel_config_val = init_device_compute_kernel_config(arch, compute_kernel_config, math_fidelity);
bool broadcast_batch = get_broadcast_batch(input_tensor_a, input_tensor_b, program_config);
Expand Down

0 comments on commit 06a9d66

Please sign in to comment.