In [1]:
#hide
%load_ext autoreload
%autoreload 2

In [9]:
#default_exp dataflow

In [10]:
#export

import time

from nifi_api.environment import Vars, NifiIds
from nifi_api.rest import Flowfiles, Processor

# Dataflow

>   Monitors and controls a Nifi Dataflow.

In [11]:
#export


class DataFlow:
    """
    Monitors and controls a Nifi dataflow. The dataflow starts
    when the **run** method is called.

    Parameters
   -------------

      dataFlowIds: DataFlowIds
        data structure that contains all the IDs of the in/out
        processors and connections

"""

    def __init__(
        self,
        dataFlowIds: object,
    ) -> None:
        self.in_processor = Processor(dataFlowIds.in_processor)
        self.in_flowfiles = Flowfiles(dataFlowIds.in_connection)
        self.middle_processor = Processor(dataFlowIds.middle_processor)
        self.out_processor = Processor(dataFlowIds.out_processor)
        self.out_flowfiles = Flowfiles(dataFlowIds.out_connection)

    def run(self) -> None:

        self.out_processor.update_run_status("STOPPED")
        self.in_processor.update_run_status("RUNNING")
        time.sleep(Vars.seconds_after_start)
        self.in_flowfiles.get_ids()
        self.middle_processor.update_run_status("RUNNING")
        self.in_processor.update_run_status("STOPPED")

        while True:

            self.out_flowfiles.get_ids()

            if self.in_flowfiles.equals(self.out_flowfiles):

                self.middle_processor.update_run_status("STOPPED")
                self.out_processor.update_run_status("RUNNING")
                print("Pipeline watching has finished ...")
                break
            time.sleep(Vars.seconds_between_checks)

In [12]:
# Test
# Uses the group processor *Test API* in the Cloudera session.

# 1.Turn-off all the processors, except the processor "Four".

In [13]:
# 3.Generate the data structure with the
#    connections and processors Ids

from nifi_api.environment import DataFlowIds
pipeline = {
    "in_connection": {
        "Id": "cc549c6e-0177-1000-ffff-ffffb5d2aba2",
        "name": "One -- Two"
    },
    "out_connection": {
        "Id": "51ab3b24-084f-1309-0000-00001946f2c7",
        "name": "Three -- Four"
    },
    "in_processor": {
        "Id": "36c62ad6-d606-3b04-9743-d77b6249608c",
        "name": "One"
    },
    "middle_processor": {
        "Id": "cc54862f-0177-1000-ffff-ffffe7325a20",
        "name": "Two"
    },
    "out_processor": {
        "Id": "51ab3b1e-084f-1309-a135-aa0100d7186b",
        "name": "Four"
    },
}
test_data_ids = DataFlowIds(pipeline)

In [14]:
# 4. Instantiate the DataFlow class and call the run method. Check
#    that processor "Two" has turned ON and "One" and "Four"
#    have turned OFF
test_dataflow = DataFlow(test_data_ids)

In [None]:
test_dataflow.run()

In [None]:
# 5. Finally, turn ON the processor "Three". Check that the
# **run** method has exited and processors "Two" and
# "Four" are OFF and ON, respectively.

# 6. End of the test

In [2]:
#hide
from nbdev.export import notebook2script; notebook2script()

Converted 01_environment.ipynb.
Converted 02_rest.ipynb.
Converted 03_dataflow.ipynb.
Converted 04_source_to_refined.ipynb.
Converted 09_tools.ipynb.
Converted index.ipynb.
