@@ -353,37 +353,122 @@ class AndEstimator(BaseEstimator):
353353 method, (b) PREDICT: A regressor or classifier will call the predict method, whereas a non-regressor/classifier
354354 will call the transform method, and (c) SCORE: A regressor will call the score method, and a non-regressor/classifer
355355 will call the transform method.
356+
357+ Examples
358+ --------
359+ Here is a simple FeatureUnion as an AndEstimator:
360+
361+ .. code-block:: python
362+
363+ class FeatureUnion(dm.AndEstimator):
364+ def __init__(self):
365+ pass
366+
367+ def get_estimator_type(self):
368+ return 'transform'
369+
370+ def clone(self):
371+ return base.clone(self)
372+
373+ def fit_transform(self, xy_list):
374+ return self.transform(xy_list)
375+
376+ def transform(self, xy_list):
377+ X_list = []
378+ y_vec = None
379+ for xy in xy_list:
380+ X_list.append(xy.get_x())
381+ y_vec = xy.get_y()
382+ X_concat = np.concatenate(X_list, axis=1)
383+ return dm.Xy(X_concat, y_vec)
384+
385+ The above is doing a simple feature union by combining inputs from multiple edges and sending "back" a single
386+ concatenated xy. As a simple transform, it needs to only implement the `transform`, `fit_transform`, `clone`,
387+ and `get_estimator_type` methods.
356388 """
357389 @abstractmethod
358390 def transform (self , xy_list : list ) -> Xy :
391+ """
392+ An abstract method that needs to be implemented if a simple estimator
393+
394+ :param xy_list: List of xy
395+ :return: A single xy
396+ """
359397 raise NotImplementedError ("And estimator needs to implement a transform method" )
360398
361399 @abstractmethod
362400 def fit (self , xy_list : list ):
401+ """
402+ An abstract method that needs to be implemented if a regressor or a classifier
403+
404+ :param xy_list: List of xy
405+ :return: A single xy
406+ """
363407 raise NotImplementedError ("And estimator needs to implement a fit method" )
364408
365409 @abstractmethod
366410 def fit_transform (self , xy_list : list ):
411+ """
412+ An abstract method that needs to be implemented if a simple estimator
413+
414+ :param xy_list: List of xy
415+ :return: A single xy
416+ """
367417 raise NotImplementedError ("And estimator needs to implement a fit method" )
368418
369419 @abstractmethod
370420 def predict (self , xy_list : list ) -> Xy :
421+ """
422+ An abstract method that needs to be implemented if a regressor or a classifer
423+
424+ :param xy_list: List of xy
425+ :return: A single xy
426+ """
371427 raise NotImplementedError ("And classifier needs to implement the predict method" )
372428
373429 @abstractmethod
374430 def score (self , xy_list : list ) -> Xy :
431+ """
432+ An abstract method that needs to be implemented if a regressor or a classifier
433+
434+ :param xy_list: List of xy
435+ :return: A single xy
436+ """
375437 raise NotImplementedError ("And classifier needs to implement the score method" )
376438
377439 @abstractmethod
378440 def get_estimator_type (self ):
441+ """
442+ Any and estimator needs to implement this type, it is 'transform' if a simple transformer or is
443+ a 'classifier' if classifier and 'regressor' if a regressor.
444+
445+ :return: The type of the estimator
446+ """
379447 raise NotImplementedError ("And classifier needs to implement the get_estimator_type method" )
380448
381449 @abstractmethod
382450 def clone (self ):
451+ """
452+ Abstract method and all estimators are supposed to implement these. Can be as simple as the basic
453+ python clone.
454+
455+ :return: A cloned estimator
456+ """
383457 raise NotImplementedError ("And estimator needs to implement a clone method" )
384458
385459
386460class AndNode (Node ):
461+ """
462+ Basic and node, that's capable of combining inputs from multiple edges. As such, it needs to have
463+ a AndEstimator implemented. The AndEstimator itself inherits from sklearn.BaseEstimator.
464+
465+ This estimator node is a typical AND node, with ANY firing semantics, and STATELESS state.
466+
467+ Examples
468+ --------
469+
470+
471+ """
387472 def __init__ (self , node_name : str , and_estimator : AndEstimator ):
388473 super ().__init__ (node_name , and_estimator , NodeInputType .AND , NodeFiringType .ANY , NodeStateType .STATELESS )
389474
@@ -965,25 +1050,59 @@ def get_out_args(self):
9651050
9661051class PipelineInput :
9671052 """
968- in_args is a dict from a node -> [Xy]
1053+ This is a holder to capture the input to the pipeline in an appropriate manner. Internally, it holds
1054+ the input from a node to a pointer to XYref, i.e. it only holds pointers. It does not hold the entire
1055+ data. This is key to distributing the data in the object store.
1056+
1057+ Examples
1058+ --------
1059+ The simplest way to add input to a node is by specifying the X and y args, the underlying platform will
1060+ take care of distributing it to the backend in-memory object storage.
1061+
1062+ .. code-block:: python
1063+
1064+ pipeline_input = dm.PipelineInput()
1065+ pipeline_input.add_xy_arg(node_a, dm.Xy(X_train, y_train))
9691066 """
9701067 def __init__ (self ):
9711068 self .__in_args__ = {}
9721069
9731070 def add_xyref_ptr_arg (self , node : Node , xyref_ptr ):
1071+ """
1072+ A direct pointer input addition, this is typically used in internal methods, but enables the advanecd
1073+ developer to have direct access to the pipeline internals.
1074+
1075+ :param node: Node to which input needs to be added
1076+ :param xyref_ptr: The pointer to XYref
1077+ :return: None
1078+ """
9741079 if node not in self .__in_args__ :
9751080 self .__in_args__ [node ] = []
9761081
9771082 self .__in_args__ [node ].append (xyref_ptr )
9781083
9791084 def add_xyref_arg (self , node : Node , xyref : XYRef ):
1085+ """
1086+ A convenience method that adds a XYRef to the given node as input.
1087+
1088+ :param node: Node to which input needs to be added
1089+ :param xyref: The XYRef
1090+ :return: None
1091+ """
9801092 if node not in self .__in_args__ :
9811093 self .__in_args__ [node ] = []
9821094
9831095 xyref_ptr = ray .put (xyref )
9841096 self .__in_args__ [node ].append (xyref_ptr )
9851097
9861098 def add_xy_arg (self , node : Node , xy : Xy ):
1099+ """
1100+ The most common way of adding input to a node, by providing a xy.
1101+
1102+ :param node: Node to which input needs to be added
1103+ :param xy: The xy to be added
1104+ :return: None
1105+ """
9871106 if node not in self .__in_args__ :
9881107 self .__in_args__ [node ] = []
9891108
@@ -993,12 +1112,34 @@ def add_xy_arg(self, node: Node, xy: Xy):
9931112 self .add_xyref_arg (node , xyref )
9941113
9951114 def add_all (self , node , node_inargs ):
1115+ """
1116+ Adds all the in args to a given node, this is very useful when cloning a pipeline or "morphing" it
1117+ for grid search, etc.
1118+
1119+ :param node: Node to which input needs to be added
1120+ :param node_inargs: All the in args, which will be added whole
1121+ :return: None
1122+ """
9961123 self .__in_args__ [node ] = node_inargs
9971124
9981125 def get_in_args (self ):
1126+ """
1127+ Returns the dict with the node to in args mapping
1128+
1129+ :return: The internal structure holding the in args
1130+ """
9991131 return self .__in_args__
10001132
10011133 def get_parameterized_input (self , pipeline : Pipeline , parameterized_pipeline : Pipeline ):
1134+ """
1135+ This is meant to create a parameterized input given a pipeline and the parameterized pipeline.
1136+ This method will explore the nodes from pipeline that are matching the parameterized_pipeline
1137+ and copy the input over to the appropriate nodes of the parameterized_pipeline.
1138+
1139+ :param pipeline: The original pipeline
1140+ :param parameterized_pipeline: The parameterized pipeline corresponding to the original pipeline
1141+ :return: The parameterized input for the given parameterized_pipeline
1142+ """
10021143 input_nodes = parameterized_pipeline .get_input_nodes ()
10031144 parameterized_pipeline_input = PipelineInput ()
10041145 for input_node in input_nodes :
@@ -1015,11 +1156,41 @@ def get_parameterized_input(self, pipeline: Pipeline, parameterized_pipeline: Pi
10151156
10161157
10171158class PipelineParam :
1159+ """
1160+ This class captures the pipeline parameters, which can be changed for various forms of exploration.
1161+ It is a fairly simple holder class capturing for each node, the corresponding estimators parameters
1162+ as a dictionary.
1163+
1164+ It also provides creating a PipelineParam object from a parameter grid, typically used in
1165+ sklearn.GridSearchCV.
1166+
1167+ Examples
1168+ --------
1169+ A simple example to create a pipeline param from a parameter grid.
1170+
1171+ .. code-block:: python
1172+
1173+ param_grid = {
1174+ 'pca__n_components': [5, 15, 30, 45, 64],
1175+ 'logistic__C': np.logspace(-4, 4, 4),
1176+ }
1177+
1178+ pipeline_param = dm.PipelineParam.from_param_grid(param_grid)
1179+ """
10181180 def __init__ (self ):
10191181 self .__node_name_param_map__ = {}
10201182
10211183 @staticmethod
10221184 def from_param_grid (fit_params : dict ):
1185+ """
1186+ A method to create a a pipeline param object from a typical parameter grid with the standard
1187+ sklearn convention of __. For example, `pca__n_components` is a parameter for node name pca
1188+ and the parameter name is n_components. The parameter grid creates a full grid exploration of
1189+ the parameters.
1190+
1191+ :param fit_params: Dictionary of parameter name in the sklearn convention to the parameter list
1192+ :return: A pipeline param object
1193+ """
10231194 pipeline_param = PipelineParam ()
10241195 fit_params_nodes = {}
10251196 for pname , pval in fit_params .items ():
@@ -1048,10 +1219,28 @@ def from_param_grid(fit_params: dict):
10481219 return pipeline_param
10491220
10501221 def add_param (self , node_name : str , params : dict ):
1222+ """
1223+ Add a parameter to the given node name
1224+
1225+ :param node_name: Node name to add parameter to
1226+ :param params: Parameter as a dictionary
1227+ :return: None
1228+ """
10511229 self .__node_name_param_map__ [node_name ] = params
10521230
10531231 def get_param (self , node_name : str ):
1232+ """
1233+ Returns the parameter dict for the given node name
1234+
1235+ :param node_name: Node name to retrieve parameters for
1236+ :return: Dict of parameters
1237+ """
10541238 return self .__node_name_param_map__ [node_name ]
10551239
10561240 def get_all_params (self ):
1241+ """
1242+ Return all the parmaters for the given pipeline param
1243+
1244+ :return: A dict from node name to the dictionary of parameters
1245+ """
10571246 return self .__node_name_param_map__
0 commit comments