In [None]:
#hide
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
#default_exp dataflow

In [None]:
#export

import time

from nifi_api.rest import Flowfiles, Processor

# Dataflow

>   Monitors and controls a Nifi Dataflow.

In [None]:
#export


class DataFlow:
    """
    Monitors and controls a Nifi dataflow. The dataflow starts
    when the **run** method is called.

    Parameters
   -------------

      dataFlowIds: DataFlowIds
        data structure that contains all the IDs of the in/out
        processors and connections

"""

    def __init__(
        self,
        dataflow_ids: object,
        delay_seconds_after_start: int = 14,
        delay_seconds_between_checks: int = 15,
    ) -> None:
        self.in_processor = Processor(dataflow_ids.in_processor)
        self.in_flowfiles = Flowfiles(dataflow_ids.in_connection)
        self.middle_processor = Processor(dataflow_ids.middle_processor)
        self.out_processor = Processor(dataflow_ids.out_processor)
        self.out_flowfiles = Flowfiles(dataflow_ids.out_connection)

        self.seconds_after_start = delay_seconds_after_start
        self.seconds_between_checks = delay_seconds_between_checks

    def run(self) -> None:

        print('pipeline watching has started..')

        self.out_processor.update_run_status("STOPPED")
        self.in_processor.update_run_status("RUNNING")
        time.sleep(self.seconds_after_start)
        self.in_flowfiles.get_ids()
        self.middle_processor.update_run_status("RUNNING")
        self.in_processor.update_run_status("STOPPED")

        while True:

            self.out_flowfiles.get_ids()

            if self.in_flowfiles.equals(self.out_flowfiles):

                self.middle_processor.update_run_status("STOPPED")
                self.out_processor.update_run_status("RUNNING")
                print("Pipeline watching has finished ...")
                break
            time.sleep(self.seconds_between_checks)

In [None]:
# Test
# Uses the group processor *Test API* in the Cloudera session.

# 1. Turn on the  "Initial" and "Middle" processors, turn off the
#    "Body" and "Final" processors.

# 2. Generate the data structure with the connections and processors Ids

from nifi_api.environment import DataFlowIds
ids = {
    "in_connection": {
        "Id": "cc549c6e-0177-1000-ffff-ffffb5d2aba2",
        "name": "First"
    },
    "out_connection": {
        "Id": "51ab3b24-084f-1309-0000-00001946f2c7",
        "name": "Final"
    },
    "in_processor": {
        "Id": "36c62ad6-d606-3b04-9743-d77b6249608c",
        "name": "First"
    },
    "middle_processor": {
        "Id": "cc54862f-0177-1000-ffff-ffffe7325a20",
        "name": "Middle"
    },
    "out_processor": {
        "Id": "51ab3b1e-084f-1309-a135-aa0100d7186b",
        "name": "Final"
    },
}
data_ids = DataFlowIds(ids)

# 4. Instantiate the DataFlow class as follows:
test_dataflow = DataFlow(
    dataflow_ids=data_ids,
    delay_seconds_after_start=10,
    delay_seconds_between_checks=10,
)
# Call the run method. The following events must happen:
#  - "First" and "Last" proccessor turn on and off, respectively.
#  - "First" processor turns off and "Middle" processor turns on
#  - "Final" turns on

test_dataflow.run()

Pipeline watching has finished ...


In [None]:
#hide
from nbdev.export import notebook2script; notebook2script()

Converted 01_environment.ipynb.
Converted 02_rest.ipynb.
Converted 03_dataflow.ipynb.
Converted 04_source_to_refined.ipynb.
Converted 09_tools.ipynb.
Converted index.ipynb.
