diff --git a/tt_metal/impl/dispatch/command_queue.cpp b/tt_metal/impl/dispatch/command_queue.cpp index b7589e10967..913eca885fe 100644 --- a/tt_metal/impl/dispatch/command_queue.cpp +++ b/tt_metal/impl/dispatch/command_queue.cpp @@ -1162,25 +1162,24 @@ EnqueueTerminateCommand::EnqueueTerminateCommand( command_queue_id(command_queue_id), device(device), manager(manager) {} void EnqueueTerminateCommand::process() { - uint32_t cmd_sequence_sizeB = - CQ_PREFETCH_CMD_BARE_MIN_SIZE + // CQ_PREFETCH_CMD_RELAY_INLINE + CQ_DISPATCH_CMD_TERMINATE - CQ_PREFETCH_CMD_BARE_MIN_SIZE; // CQ_PREFETCH_CMD_TERMINATE + // CQ_PREFETCH_CMD_RELAY_INLINE + CQ_DISPATCH_CMD_TERMINATE + // CQ_PREFETCH_CMD_TERMINATE + uint32_t cmd_sequence_sizeB = CQ_PREFETCH_CMD_BARE_MIN_SIZE; + // dispatch and prefetch terminate commands each needs to be a separate fetch queue entry void *cmd_region = this->manager.issue_queue_reserve(cmd_sequence_sizeB, this->command_queue_id); - - HugepageDeviceCommand command_sequence(cmd_region, cmd_sequence_sizeB); - command_sequence.add_dispatch_terminate(); - command_sequence.add_prefetch_terminate(); - + HugepageDeviceCommand dispatch_command_sequence(cmd_region, cmd_sequence_sizeB); + dispatch_command_sequence.add_dispatch_terminate(); this->manager.issue_queue_push_back(cmd_sequence_sizeB, this->command_queue_id); + this->manager.fetch_queue_reserve_back(this->command_queue_id); + this->manager.fetch_queue_write(cmd_sequence_sizeB, this->command_queue_id); - // Command sequence has dispatch and prefetch terminate commands but each needs to be a separate fetch queue entry - TT_ASSERT(cmd_sequence_sizeB % CQ_PREFETCH_CMD_BARE_MIN_SIZE == 0); - uint32_t num_terminate_cmds = cmd_sequence_sizeB / CQ_PREFETCH_CMD_BARE_MIN_SIZE; - for (int terminate_cmd_idx = 0; terminate_cmd_idx < num_terminate_cmds; terminate_cmd_idx++) { - this->manager.fetch_queue_reserve_back(this->command_queue_id); - this->manager.fetch_queue_write(CQ_PREFETCH_CMD_BARE_MIN_SIZE, this->command_queue_id); - } + cmd_region = this->manager.issue_queue_reserve(cmd_sequence_sizeB, this->command_queue_id); + HugepageDeviceCommand prefetch_command_sequence(cmd_region, cmd_sequence_sizeB); + prefetch_command_sequence.add_prefetch_terminate(); + this->manager.issue_queue_push_back(cmd_sequence_sizeB, this->command_queue_id); + this->manager.fetch_queue_reserve_back(this->command_queue_id); + this->manager.fetch_queue_write(cmd_sequence_sizeB, this->command_queue_id); } diff --git a/tt_metal/impl/dispatch/command_queue_interface.hpp b/tt_metal/impl/dispatch/command_queue_interface.hpp index a868adbf4c1..411c01eda21 100644 --- a/tt_metal/impl/dispatch/command_queue_interface.hpp +++ b/tt_metal/impl/dispatch/command_queue_interface.hpp @@ -329,9 +329,9 @@ class SystemMemoryManager { } this->channel_offset = MAX_HUGEPAGE_SIZE * channel; + CoreType core_type = dispatch_core_manager::get(num_hw_cqs).get_dispatch_core_type(device_id); for (uint8_t cq_id = 0; cq_id < num_hw_cqs; cq_id++) { tt_cxy_pair prefetcher_core = dispatch_core_manager::get(num_hw_cqs).prefetcher_core(device_id, channel, cq_id); - CoreType core_type = dispatch_core_manager::get(num_hw_cqs).get_dispatch_core_type(device_id); tt_cxy_pair prefetcher_physical_core = tt_cxy_pair(prefetcher_core.chip, tt::get_physical_core_coordinate(prefetcher_core, core_type)); this->prefetcher_cores[cq_id] = prefetcher_physical_core; @@ -341,6 +341,10 @@ class SystemMemoryManager { this->completion_byte_addrs[cq_id] = completion_tlb_offset + CQ_COMPLETION_READ_PTR % completion_tlb_size; this->cq_interfaces.push_back(SystemMemoryCQInterface(channel, cq_id, this->cq_size)); + // Prefetch queue acts as the sync mechanism to ensure that issue queue has space to write, so issue queue must be as large as the max amount of space the prefetch queue can specify + // Plus 1 to handle wrapping + // Plus 1 to allow us to start writing to issue queue before we reserve space in the prefetch queue + TT_FATAL(dispatch_constants::get(core_type).max_prefetch_command_size() * (dispatch_constants::get(core_type).prefetch_q_entries() + 2) <= this->get_issue_queue_size(cq_id)); this->cq_to_event.push_back(0); this->cq_to_last_completed_event.push_back(0); this->prefetch_q_dev_ptrs[cq_id] = dispatch_constants::PREFETCH_Q_BASE;