Skip to content

Commit

Permalink
Utilize ctrl_deps for operator dependencies in simulation
Browse files Browse the repository at this point in the history
Previously, the data_deps field was utilized to encode operator
dependencies in simulations. However, data_deps should actually
be reserved for encoding data dependencies, not for simulating
operator dependencies. Therefore, this commit updates the code
to ensure that pytorch2chakra_converter.py employs ctrl_deps for
this purpose.
  • Loading branch information
TaekyungHeo committed Feb 22, 2024
1 parent e053939 commit 8977da7
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 58 deletions.
98 changes: 48 additions & 50 deletions et_converter/pytorch2chakra_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ class PyTorch2ChakraConverter:
parent_to_children_map (Dict[int, List[int]]): Map of Chakra parent node
IDs to their child node
IDs. Used to simulate
execution based on data
execution based on control
dependencies.
"""

Expand Down Expand Up @@ -187,7 +187,7 @@ def convert(self) -> None:

root_nodes = [node for node in self.chakra_nodes.values() if self.is_root_node(node)]
for root_node in root_nodes:
self.convert_ctrl_dep_to_data_dep(root_node)
self.convert_parent_child_relationship_to_ctrl_dep(root_node)

self.remove_dangling_nodes()

Expand Down Expand Up @@ -488,8 +488,6 @@ def convert_to_chakra_node(self, pytorch_node: PyTorchNode) -> ChakraNode:
chakra_node.id = pytorch_node.id
chakra_node.name = pytorch_node.name
chakra_node.type = self.get_chakra_node_type_from_pytorch_node(pytorch_node)
if pytorch_node.parent in self.chakra_nodes:
chakra_node.ctrl_deps.append(pytorch_node.parent)
chakra_node.duration_micros = pytorch_node.exclusive_dur
chakra_node.inputs.values = str(pytorch_node.inputs)
chakra_node.inputs.shapes = str(pytorch_node.input_shapes)
Expand All @@ -499,14 +497,15 @@ def convert_to_chakra_node(self, pytorch_node: PyTorchNode) -> ChakraNode:
chakra_node.outputs.types = str(pytorch_node.output_types)
chakra_node.attr.extend([
ChakraAttr(name="rf_id", int64_val=pytorch_node.rf_id),
ChakraAttr(name="parent", int64_val=pytorch_node.parent),
ChakraAttr(name="fw_parent", int64_val=pytorch_node.fw_parent),
ChakraAttr(name="seq_id", int64_val=pytorch_node.seq_id),
ChakraAttr(name="scope", int64_val=pytorch_node.scope),
ChakraAttr(name="tid", int64_val=pytorch_node.tid),
ChakraAttr(name="fw_tid", int64_val=pytorch_node.fw_tid),
ChakraAttr(name="op_schema", string_val=pytorch_node.op_schema),
ChakraAttr(name="is_cpu_op", int32_val=not pytorch_node.is_gpu_op()),
ChakraAttr(name="ts", int64_val=pytorch_node.ts)
ChakraAttr(name="ts", int64_val=pytorch_node.ts),
])
return chakra_node

Expand Down Expand Up @@ -580,33 +579,32 @@ def is_root_node(self, node):
"[pytorch|profiler|execution_trace|thread]"]:
return True

def convert_ctrl_dep_to_data_dep(self, chakra_node: ChakraNode) -> None:
"""
Traverses nodes based on control dependencies (parent nodes) and encodes
data dependencies appropriately. This method is crucial for converting the
dependency structure from PyTorch execution traces to Chakra execution
traces. In PyTorch traces, control dependencies are represented by a
parent field in each node, denoting the parent node ID. This structure
indicates which functions (operators) are called by a particular operator.
In contrast, Chakra execution traces, while retaining control dependencies
for compatibility, primarily rely on data dependencies to represent
relationships between nodes. Data dependencies in Chakra are more broadly
defined compared to those in PyTorch, where they are implicitly encoded in
tensor input-output relationships. In Chakra, data dependencies are explicit
and represent a general dependency between nodes.
To convert PyTorch's control dependencies to Chakra's data dependencies, a
Depth-First Search (DFS) is performed. The DFS traversal starts from a given
Chakra node, traversing through its children (based on control
dependencies). During traversal, data dependencies are encoded by linking
nodes that have been visited in sequence. These dependencies form a chain,
mirroring the function call order from the PyTorch trace.
def convert_parent_child_relationship_to_ctrl_dep(self, chakra_node: ChakraNode) -> None:
"""
Traverses nodes based on parent-child relationships in PyTorch execution
traces and encodes control dependencies appropriately. This method is
crucial for converting the dependency structure from PyTorch execution
traces to Chakra execution traces. In PyTorch traces, control dependencies
are represented by a parent field in each node, denoting the parent node ID.
This structure indicates which functions (operators) are called by a particular
operator.
In contrast, Chakra execution traces, while retaining parent-child
relationships for compatibility, primarily rely on control dependencies
to represent relationships between nodes. In Chakra, control dependencies
are explicit and represent a general dependency between nodes.
To convert PyTorch's parent-child relationships to Chakra's control dependencies,
a Depth-First Search (DFS) is performed. The DFS traversal starts from a given
Chakra node, traversing through its children (based on parent-child relationships).
During traversal, control dependencies are encoded by linking nodes that
have been visited in sequence. These dependencies form a chain, mirroring
the function call order from the PyTorch trace.
Special attention is given to the types of nodes involved. CPU and label
nodes (non-GPU) in PyTorch can only depend on other CPU or label nodes.
However, GPU nodes can depend on any type of node. Thus, while traversing,
if a GPU node is encountered, it can establish a data dependency with the
if a GPU node is encountered, it can establish a control dependency with the
last visited node of any type. For CPU and label nodes, the dependency is
only established with the last visited non-GPU node. This distinction
ensures that the converted dependencies accurately reflect the execution
Expand Down Expand Up @@ -650,38 +648,38 @@ def convert_ctrl_dep_to_data_dep(self, chakra_node: ChakraNode) -> None:

if node_op_type == PyTorchNodeType.GPU_OP:
if last_visited_any:
if last_visited_any.id not in current_node.data_deps:
current_node.data_deps.append(last_visited_any.id)
if last_visited_any.id not in current_node.ctrl_deps:
current_node.ctrl_deps.append(last_visited_any.id)
self.logger.debug(
f"GPU Node ID {current_node.id} now has a data "
f"GPU Node ID {current_node.id} now has a control "
f"dependency on Node ID {last_visited_any.id}"
)

stream_id = pytorch_node.stream
if stream_id in last_gpu_in_stream:
if last_gpu_in_stream[stream_id].id not in current_node.data_deps:
current_node.data_deps.append(last_gpu_in_stream[stream_id].id)
if last_gpu_in_stream[stream_id].id not in current_node.ctrl_deps:
current_node.ctrl_deps.append(last_gpu_in_stream[stream_id].id)
self.logger.debug(
f"GPU Node ID {current_node.id} in stream {stream_id} now has a data "
f"GPU Node ID {current_node.id} in stream {stream_id} now has a control "
f"dependency on GPU Node ID {last_gpu_in_stream[stream_id].id} in the same stream."
)
last_gpu_in_stream[stream_id] = current_node
last_visited_any = current_node
else:
if pytorch_node.inter_thread_dep:
for id in self.id_assigner.get_assigned_ids(pytorch_node.inter_thread_dep):
if id not in current_node.data_deps:
current_node.data_deps.append(id)
if id not in current_node.ctrl_deps:
current_node.ctrl_deps.append(id)
self.logger.debug(
f"CPU Node ID {current_node.id} now has an inter-thread data "
f"CPU Node ID {current_node.id} now has an inter-thread control "
f"dependency on Node ID {id}"
)

if last_visited_non_gpu:
if last_visited_non_gpu.id not in current_node.data_deps:
current_node.data_deps.append(last_visited_non_gpu.id)
if last_visited_non_gpu.id not in current_node.ctrl_deps:
current_node.ctrl_deps.append(last_visited_non_gpu.id)
self.logger.debug(
f"CPU Node ID {current_node.id} now has a data "
f"CPU Node ID {current_node.id} now has a control "
f"dependency on non-GPU Node ID {last_visited_non_gpu.id}"
)
last_visited_non_gpu = current_node
Expand All @@ -701,11 +699,11 @@ def remove_dangling_nodes(self) -> None:
"""
parent_ids = set()
for node in self.chakra_nodes.values():
parent_ids.update(node.data_deps)
parent_ids.update(node.ctrl_deps)

dangling_nodes = []
for node_id, node in list(self.chakra_nodes.items()):
if node_id not in parent_ids and not node.data_deps:
if node_id not in parent_ids and not node.ctrl_deps:
dangling_nodes.append(node)
del self.chakra_nodes[node_id]
del self.pytorch_nodes[node_id]
Expand All @@ -721,7 +719,7 @@ def update_parent_to_children_map(self) -> None:
This map is used to efficiently simulate node execution based on data dependencies.
"""
for node_id, node in self.chakra_nodes.items():
for dep_id in node.data_deps:
for dep_id in node.ctrl_deps:
# Ensure the dependency is registered as a parent of the current node
if dep_id not in self.parent_to_children_map:
self.parent_to_children_map[dep_id] = []
Expand Down Expand Up @@ -765,7 +763,7 @@ def dfs(node_id: int, path: List[int]) -> bool:
visited.add(node_id)
stack.add(node_id)
path.append(node_id)
for child_id in self.chakra_nodes[node_id].data_deps:
for child_id in self.chakra_nodes[node_id].ctrl_deps:
if dfs(child_id, path.copy()):
return True
stack.remove(node_id)
Expand Down Expand Up @@ -841,26 +839,26 @@ def close_chakra_execution_trace(self) -> None:

def simulate_execution(self) -> None:
"""
Simulates the execution of Chakra nodes based on data dependencies.
Simulates the execution of Chakra nodes based on control dependencies.
This method considers both CPU and GPU nodes. Nodes are issued for
execution based on the readiness determined by dependency resolution.
A simplistic global clock is used to model the execution time.
"""
self.logger.info("Simulating execution of Chakra nodes based on data "
self.logger.info("Simulating execution of Chakra nodes based on control "
"dependencies.")

# Initialize queues for ready CPU and GPU nodes
ready_cpu_nodes = [
(node_id, self.chakra_nodes[node_id])
for node_id in self.chakra_nodes
if not self.chakra_nodes[node_id].data_deps and
if not self.chakra_nodes[node_id].ctrl_deps and
not self.pytorch_nodes[node_id].is_gpu_op()
]
ready_gpu_nodes = [
(node_id, self.chakra_nodes[node_id])
for node_id in self.chakra_nodes
if not self.chakra_nodes[node_id].data_deps and
if not self.chakra_nodes[node_id].ctrl_deps and
self.pytorch_nodes[node_id].is_gpu_op()
]
ready_cpu_nodes.sort(key=lambda x: x[1].id)
Expand Down Expand Up @@ -910,8 +908,8 @@ def simulate_execution(self) -> None:
children_ids = self.parent_to_children_map.get(node_id, [])
for child_id in children_ids:
child_node = self.chakra_nodes[child_id]
child_node.data_deps.remove(node_id)
if not child_node.data_deps:
child_node.ctrl_deps.remove(node_id)
if not child_node.ctrl_deps:
if not self.pytorch_nodes[child_id].is_gpu_op():
ready_cpu_nodes.append((child_id, child_node))
else:
Expand Down
16 changes: 8 additions & 8 deletions et_feeder/et_feeder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,15 @@ void ETFeeder::freeChildrenNodes(uint64_t node_id) {
shared_ptr<ETFeederNode> node = dep_graph_[node_id];
for (auto child : node->getChildren()) {
auto child_chakra = child->getChakraNode();
for (auto it = child_chakra->mutable_data_deps()->begin();
it != child_chakra->mutable_data_deps()->end();
for (auto it = child_chakra->mutable_ctrl_deps()->begin();
it != child_chakra->mutable_ctrl_deps()->end();
++it) {
if (*it == node_id) {
child_chakra->mutable_data_deps()->erase(it);
child_chakra->mutable_ctrl_deps()->erase(it);
break;
}
}
if (child_chakra->data_deps().size() == 0) {
if (child_chakra->ctrl_deps().size() == 0) {
dep_free_node_id_set_.emplace(child_chakra->id());
dep_free_node_queue_.emplace(child);
}
Expand All @@ -101,13 +101,13 @@ shared_ptr<ETFeederNode> ETFeeder::readNode() {
shared_ptr<ETFeederNode> node = make_shared<ETFeederNode>(pkt_msg);

bool dep_unresolved = false;
for (int i = 0; i < pkt_msg->data_deps_size(); ++i) {
auto parent_node = dep_graph_.find(pkt_msg->data_deps(i));
for (int i = 0; i < pkt_msg->ctrl_deps_size(); ++i) {
auto parent_node = dep_graph_.find(pkt_msg->ctrl_deps(i));
if (parent_node != dep_graph_.end()) {
parent_node->second->addChild(node);
} else {
dep_unresolved = true;
node->addDepUnresolvedParentID(pkt_msg->data_deps(i));
node->addDepUnresolvedParentID(pkt_msg->ctrl_deps(i));
}
}

Expand Down Expand Up @@ -166,7 +166,7 @@ void ETFeeder::readNextWindow() {
uint64_t node_id = node_id_node.first;
shared_ptr<ETFeederNode> node = node_id_node.second;
if ((dep_free_node_id_set_.count(node_id) == 0) &&
(node->getChakraNode()->data_deps().size() == 0)) {
(node->getChakraNode()->ctrl_deps().size() == 0)) {
dep_free_node_id_set_.emplace(node_id);
dep_free_node_queue_.emplace(node);
}
Expand Down

0 comments on commit 8977da7

Please sign in to comment.