@@ -57,13 +57,27 @@ class XYRef:
5757 """
5858 Holder class that maintains a pointer/reference to X and y. The goal of this is to provide
5959 a holder to the object references of Ray. This is used for passing outputs from a transform/fit
60- to the next stage of the pipeline. Since the references can be potentially in flight (or being
60+ to the next stage of the pipeline. Since the object references can be potentially in flight (or being
6161 computed), these holders are essential to the pipeline constructs.
6262
6363 It also holds the state of the node itself, with the previous state of the node before a transform
6464 operation is applied being held along with the next state. It also holds the previous
6565 XYRef instances. In essence, this holder class is a bunch of pointers, but it is enough to reconstruct
6666 the entire pipeline through appropriate traversals.
67+
68+ NOTE: Default constructor takes pointer to X and y. The more advanced constructs are pointer
69+ holders for the pipeline during its execution and are not meant to be used outside by developers.
70+
71+ Examples
72+ --------
73+ .. code-block:: python
74+
75+ x = np.array([1.0, 2.0, 4.0, 5.0])
76+ y = np.array(['odd', 'even', 'even', 'odd'])
77+ x_ref = ray.put(x)
78+ y_ref = ray.put(y)
79+
80+ xy_ref = XYRef(x_ref, y_ref)
6781 """
6882
6983 def __init__ (self , Xref : ray .ObjectRef , yref : ray .ObjectRef , prev_node_state_ref : ray .ObjectRef = None , curr_node_state_ref : ray .ObjectRef = None , prev_Xyrefs = None ):
@@ -124,16 +138,47 @@ def get_prev_xyrefs(self):
124138
125139
126140class NodeInputType (Enum ):
141+ """
142+ Defines the node input types, currently, it supports an OR and AND node. An OR node is backed by an
143+ Estimator and an AND node is backed by an arbitrary lambda defined by an AndFunc. The key difference
144+ is that for an OR node, the parallelism is defined at a single XYRef object, whereas for an AND node,
145+ the parallelism is defined on a collection of objects coming "into" the AND node.
146+
147+ For details on parallelism and pipeline semantics, the reader is directed to the pipeline semantics
148+ introduction of the User guide.
149+ """
127150 OR = 0 ,
128151 AND = 1
129152
130153
131154class NodeFiringType (Enum ):
155+ """
156+ Defines the "firing" semantics of a node, there are two types of firing semantics, ANY and ALL. ANY
157+ firing semantics means that upon the availability of a single object, the node will start executing
158+ its work. Whereas, on ALL semantics, the node has to wait for ALL the objects ot be materialized
159+ before the computation can begin, i.e. it is blocking.
160+
161+ For details on firing and pipeline semantics, the reader is directed to the pipeline semantics
162+ introduction of the User guide.
163+ """
132164 ANY = 0 ,
133165 ALL = 1
134166
135167
136168class NodeStateType (Enum ):
169+ """
170+ Defines the state type of a node, there are 4 types of state, which are STATELESS, IMMUTABLE, MUTABLE_SEQUENTIAL
171+ and MUTABLE_AGGREGATE.
172+
173+ A STATELESS node is one that keeps no state and can be called any number of times without any change to the "model"
174+ or "function" state.
175+
176+ A IMMUTABLE node is one that once a model has "fitted" cannot change, i.e. there is no partial fit available.
177+
178+ A MUTABLE_SEQUENTIAL node is one that can be updated with a sequence of input object(s) or a stream.
179+
180+ A MUTABLE_AGGREGATE node is one that can be updated in batches.
181+ """
137182 STATELESS = 0 ,
138183 IMMUTABLE = 1 ,
139184 MUTABLE_SEQUENTIAL = 2 ,
@@ -156,16 +201,36 @@ def __init__(self, node_name, node_input_type: NodeInputType, node_firing_type:
156201 def __str__ (self ):
157202 return self .__node_name__
158203
159- def get_node_name (self ):
204+ def get_node_name (self ) -> str :
205+ """
206+ Returns the node name
207+
208+ :return: The name of this node
209+ """
160210 return self .__node_name__
161211
162- def get_node_input_type (self ):
212+ def get_node_input_type (self ) -> NodeInputType :
213+ """
214+ Return the node input type
215+
216+ :return: The node input type
217+ """
163218 return self .__node_input_type__
164219
165- def get_node_firing_type (self ):
220+ def get_node_firing_type (self ) -> NodeFiringType :
221+ """
222+ Return the node firing type
223+
224+ :return: The node firing type
225+ """
166226 return self .__node_firing_type__
167227
168- def get_node_state_type (self ):
228+ def get_node_state_type (self ) -> NodeStateType :
229+ """
230+ Return the node state type
231+
232+ :return: The node state type
233+ """
169234 return self .__node_state_type__
170235
171236 @abstractmethod
@@ -196,8 +261,11 @@ def __eq__(self, other):
196261
197262class EstimatorNode (Node ):
198263 """
199- Or node, which is the basic node that would be the equivalent of any SKlearn pipeline
264+ Basic estimator node, which is the basic node that would be the equivalent of any SKlearn pipeline
200265 stage. This node is initialized with an estimator that needs to extend sklearn.BaseEstimator.
266+
267+ This estimator node is typically an OR node, with ANY firing semantics, and IMMUTABLE state. For
268+ partial fit, we will have to define a different node type to keep semantics very clear.
201269 """
202270
203271 def __init__ (self , node_name : str , estimator : BaseEstimator ):
0 commit comments