From edff96b60337a651a7c1c68d1e2ce806c7988c55 Mon Sep 17 00:00:00 2001 From: Leo Anthias Date: Mon, 27 Feb 2017 15:14:01 +0000 Subject: [PATCH 01/12] More complex quick start --- quick_start/index.rst | 1 + quick_start/module.rst | 2 +- quick_start/more.rst | 278 +++++++++++++++++++++++++++++++++++++++ quick_start/starting.rst | 4 - quick_start/workflow.rst | 2 +- 5 files changed, 281 insertions(+), 6 deletions(-) create mode 100644 quick_start/more.rst delete mode 100644 quick_start/starting.rst diff --git a/quick_start/index.rst b/quick_start/index.rst index 85adf00..dbe06a2 100644 --- a/quick_start/index.rst +++ b/quick_start/index.rst @@ -16,3 +16,4 @@ Make sure you have :doc:`installed NStack ` and let's get going. module workflow + more diff --git a/quick_start/module.rst b/quick_start/module.rst index 8679f94..ec8b24d 100644 --- a/quick_start/module.rst +++ b/quick_start/module.rst @@ -1,6 +1,6 @@ .. _module: -Writing your Module +Building a Module ========================= NStack Modules contain the methods that can be used on the NStack platform. They are the building blocks which can be used to build workflows and applications. diff --git a/quick_start/more.rst b/quick_start/more.rst new file mode 100644 index 0000000..fc71bcd --- /dev/null +++ b/quick_start/more.rst @@ -0,0 +1,278 @@ +.. _more: + +Writing Your Own Module +======================== + +So far, we have built and published a Python module with a single method on it, ``numChars``, and built a workflow which connects our method to an HTTP endpoint. This in itself isn't particularly useful, so, now that you've got the gist of how NStack works, it's time to build something more realistic! + +In this tutorial, we're going to create and productionise a simple classifier which uses the famous `iris dataset `_. + +We're going to train our classifier to classify which species an iris is, given measurements of its sepals and petals. You can find the dataset we're using to train our model `here `_. + +First, let's look at the the format of our data to see how we should approach the problem. We see that we have five fields: + +================ ======================= =========== +Field Name Description Type +================ ======================= =========== +``species`` The species of iris Text + +``sepal_width`` The width of the sepal Double + +``sepal_length`` The length of the sepal Double + +``petal_width`` The width of the petal Double + +``petal_length`` The length of the petal Double +================ ======================= =========== + +If we are trying to find the species based on the sepal and petal measurements, this means these measurements are going to be the input to our classifier module, with text being the output. This means we need to write a method in Python which takes four ``Double``\s and returns ``Text``. + +## Creating your module + +To begin, let's make a new directory called ``irisclassify``, ``cd`` into it, and initialise a new module: + +.. code:: bash + + ~/ $ mkdir irisclassify; cd irisclassify + ~/irisclassify/ $ nstack init python + python module 'irisclassify' successfully initialised at ~/irisclassify + +Next, let's download our training data into this so we can use it in our module. + +.. code:: bash + + ~/irisclassify/ $ curl -O https://raw.githubusercontent.com/nstackcom/nstack-examples/master/iris/irisclassify/train.csv + + +Defining our API +**************** + +As we know what the input and output of our classifier is going to look like, let's edit the ``api`` section of ``nstack.yaml`` to define our API (i.e. the entry-point into our module). By default a new module contains a sample method ``numChars``, which we can replace with our definition. We're going to call the method we write in Python ``predict``, which means we can fill in the ``api`` section of ``nstack.yaml`` as follows: + +.. code :: java + + api : | + interface Default { + predict : (Double, Double, Double, Double) -> Text + } + +This means we are exposing a single method ``predict``, which takes a record of four ``Double``\s (the measurements) and returns ``Text`` (the iris species). + + +Writing our classifier +********************** + +Now that we've defined our API, let's jump into our Python module, which lives in ``service.py``. +We see that NStack has created a class ``Service``. This is where we add the methods for our module. Right now it also has a sample method in it, ``numChars``, which we can remove. + + +Firstly, let's import the libaries we're using. + +.. code :: python + + import nstack + import pandas as pd + + from sklearn.ensemble import RandomForestClassifier + +.. note :: Python modules must also import ``nstack`` + +Before we add our ``predict`` method, we're going to add ``__init__``, the Python contructor method which runs upon the creation of our module. It's going to load our data from ``train.csv``, and use it to train our Random Forest classifier: + +.. code :: python + + def __init__(self): + train = pd.read_csv("train.csv") + + self.cols = ['petal_length', 'petal_width', 'sepal_length', 'sepal_width'] + colsRes = ['class'] + trainArr = train.as_matrix(self.cols) + trainRes = train.as_matrix(colsRes) + + rf = RandomForestClassifier(n_estimators=100) + rf.fit(trainArr, trainRes) + self.rf = rf + +Now we can write our ``predict`` method. The second argument, ``inputArr``, is the input -- in this case, our four ``Double``\s. To return text, we simply return from the method in Python. + +.. code :: python + + def predict(self, inputArr): + points = [inputArr] + df = pd.DataFrame(points, columns=self.cols) + + results = self.rf.predict(df) + return results.item() + +Configuration +************* + +When your module is started, it is run in a Linux container on the NStack server. Because our module uses libraries like ``pandas`` and ``sklearn``, we have to tell NStack to install some extra operating system libraries inside your module's container. NStack lets us specify these in our ``nstack.yaml``` configuration file in the ``packages`` section. Let's add the following packages: + +.. code :: yaml + + packages: ['numpy', 'python3-scikit-learn.x86_64', 'scipy', 'python3-scikit-image.x86_64', 'python3-pandas.x86_64'] + +Additionally, we want to tell NStack to copy our ``train.csv`` file into our module, so we can use it to train our data. ``nstack.yaml`` also has a section for specifying files you'd like to include: + +.. code :: yaml + + files: ['train.csv'] + + +Publishing and Starting +*********************** + +Now we're ready to build and publish our classifier. Remember, even though we run this command locally, our module gets built and published to your NStack Server. + +.. code :: bash + + ~/irisclassify/ $ nstack build + Building NStack Container module irisclassify. Please wait. This may take some time. + Module irisclassify built successfully. Use `nstack list methods` to see all available methods. + +We can see our method, ``irisclassify.predict``. Including our ``demo.numChars`` method from the previous tutorial, we should now have two: + +.. code :: bash + + ~/irisclassify/ $ nstack list methods + irisclassify.predict : (Double, Double, Double, Double) -> Text + demo.numChars : Text -> Integer + +Our classifier is now published, but to use it we need to connect it to an event-source and sink. In the previous tutorial, we used HTTP as a source, and the NStack log as a sink. We can do the same here by starting the following workflow. + +.. code :: bash + + ~/irisclassify/ $ nstack start "source(http:///irisendpoint : (Double, Double, Double, Double)) | irisclassify.predict | sink(log:// : Text)" + +This creates an HTTP endpoint on ``http://localhost:8080/irisendpoint`` which can receive four ``Double``\s, and writes the results to the log as ``Text``. We can test our classifier by sending it some of the sample data from ``train.csv``: + +.. code :: bash + + ~/irisclassify/ $ curl -X PUT -d '{ "params" : [4.7, 1.4, 6.1, 2.9] }' localhost:8080/irisendpoint + Success + ~/irisclassify/ $ nstack log 2 + Feb 17 10:32:30 nostromo nstack-server[8925]: OUTPUT: "Iris-versicolor" + +Great! Our classifiier is now productionised. + +Other Sources and Sinks +*********************** + +So far we have used HTTP as a source, and the log as a sink, but NStack supports many other integrations. For instance, we can connect our classifier to use a database as a source and/or a sink by using the ``postgresql`` intergration: + +.. code :: bash + + ~/irisclassify/ $ nstack start "source(postgresql://foo:bar@database.contoso.com/flowers?query=SELECT%20*%20FROM%20iris : (Double, Double, Double, Double) | irisclassify.predict | sink(postgresql://foo:bar@database.contoso.com?table=flowers : Text)" + +In this case, NStack will ensure that the database is of the correct schema. + +.. note :: See all available integrations at :ref:`Supported Integrations ` + +More Powerful Workflows +*********************** + +So far, we've composed workflows out of a source, a sink, and a single method, but workflows can contain as many steps as you like, as long as the output type of one matches the input type of the other. For instance, let's add our ``demo.numChars`` method from the previous tutorial to our workflow. From listing the available methods above, we see that it takes ``Text`` and returns ``Integer``. Because our ``irisclassify.predict`` method returns ``Text``, this means we can connect -- or `compose` -- them together. + +.. note :: ``numChars`` and ``predict`` can be `composed` together because their types -- or schemas -- match. If ``predict`` wasn't configured to output ``Text``, or ``numChars`` wasn't configured to take ``Text`` as input, NStack would not let you build the following workflow. + +.. code :: bash + + ~/irisclassify/ $ nstack start "source(http:///irisendpoint : (Double, Double, Double, Double)) | irisclassify.predict | demo.numChars | sink(log:// : Integer)" + +Although you can write workflows directly in the ``start`` command, as we have above, NStack provides a more powerful way to build workflows that allows them to be re-used, shared, and composed together. +All of the workflows that are started with the ``start`` command have to be `fully composed`, which means that they contain a source, one or more modules, and a sink. Many times, you may want to write a workflow which is only `partially composed`; for instance, it contains only modules, is a combination of a source and a module, or is a combination of a module and a sink. These workflows cannot be run by themselves, but can be shared and attached to other sources, sinks, or modules when they are started. + +For instance, we could combine ``irisclassify.predict`` and ``demo.numChars`` to form a new workflow ``speciesLength`` like so: + +.. code :: java + + def speciesLength = irisclassify.predict | demo.numChars + +To build workflows like this, we create them as modules in a similar way we created a Python module -- with ``init``. Let's create a new directory called ``irisworkflow``, ``cd`` into it, and create a new workflow module. + +.. code :: bash + + ~/ $ mkdir irisworkflow; cd irisworkflow + ~/irisworkflow/ $ nstack init workflow + Workflow module 'irisworkflow' successfully initialised at /var/home/fedora/irisworkflow + +Instead of creating an ``nstack.yaml``, this creates a single file, ``workflow.nml``, which contains our workflow module. + +.. code :: java + + module irisworkflow { + // A sample workflow + def w = source(http:///s : Text) | Module1.numChars | sink(log:// : Integer) + } + +You will notice that the module itself is named ``irisworkflow`` after the directory name, and has an example workflow in it, ``w``. We're going to replace this with our ``speciesLength`` workflow above. + +.. code :: java + + module irisworkflow { + // A sample workflow + def speciesLength = irisclassify.predict | demo.numChars + } + +As with others modules, we can now build ``irisworkflow`` with the ``build`` command: + +.. code :: bash + + ~/irisworkflow/ $ nstack build + Building NStack Workflow module irisworkflow. + Workflow module irisworkflow built successfully. + +Because our workflow ``irisworkflow.speciesLength`` has not been connected to a source or a sink, is is technically a method and is treated as such. This means we can see it in alongside our other methods: + +.. code :: bash + + ~/irisworkflow/ $ nstack list methods + irisclassify.predict : (Double, Double, Double, Double) -> Text + demo.numChars : Text -> Integer + irisworkflow.speciesLength : (Double, Double, Double, Double) -> Integer + +Note that the input type of the workflow is the input type of ``irisclassify.predict``, and the output type is the output type of ``demo.numChars``. Like other methods, this can be connected to a source and a sink to make it `fully composed`: + +.. code :: bash + + ~/irisworkflow/ $ nstack start "source(http:///speciesLength : (Double, Double, Double, Double)) | irisworkflow.speciesLength | sink(log:// : Integer)" + +Alternatively, you can move the source and sink into the ``workflow.nml`` file: + +.. code :: java + + module irisworkflow { + def completeWorkflow = source(http:///speciesLength : (Double, Double, Double, Double)) | irisworkflow.speciesLength | sink(log:// : Integer) + } + +If you ``build`` this, you can then start it by itself with the ``start`` command, because it's a fully composed: + +.. code :: bash + + ~/irisworkflow/ $ nstack start irisworkflow.completeWorkflow + +This paradigm can be helpful when we apply it to sources and sinks. Oftentimes, you -- or someone else in your company -- will want to create sources and sinks which are combined with modules, for instance in the following fictional example: + +.. code :: java + + module customerRecords { + def cleanSource = source(postgresql://foo:bar@database.contoso.com/customers?query=SELECT * FROM customer_records : CustomerRecord) | DataTools.cleanCustomerRecord; + def cleanSink = DataTools.ensureValidCustomer | sink(postgresql://foo:bar@database.contoso.com/customers?table=customer_records : CustomerRecord); + } + +Preconfigured sources and sinks can be used in workflows without requiring the user to be familiar with the configuration of the source and sink. +This becomes useful when you are connecting to more complex middleware (such as streams and message queues), which those building modules and workflows may not need to understand or want to configure. Additionally, it allows sources and sinks to be created securely, without the need to share credentials with those building workflows. The user will simply recieve a stream of ``CustomerRecord``, or be able to output a ``CustomerRecord``. +In this example, we are also adding a module to each to do some processing before and after. + +NStack knows that ``cleanSource`` is still a source because is doesn't have a sink attached. Similarly, NStack knows that ``cleanSink`` is a sink, because it doesn't have a source. This means you can find them in your list of sources and sinks using ``list``, and they can be used like any other source and sink, for instance: + +.. code :: bash + + ~/ $ nstack start "customerRecords.cleanSource | customerClassifier.predict | customerRecords.cleanSink" + + + + + + diff --git a/quick_start/starting.rst b/quick_start/starting.rst deleted file mode 100644 index bba6695..0000000 --- a/quick_start/starting.rst +++ /dev/null @@ -1,4 +0,0 @@ -.. _starting: - -Starting your Workflow -========================= diff --git a/quick_start/workflow.rst b/quick_start/workflow.rst index 45e6531..e3c9658 100644 --- a/quick_start/workflow.rst +++ b/quick_start/workflow.rst @@ -1,6 +1,6 @@ .. _workflow: -Building your Workflow +Building a Workflow ========================= In the previous tutorial, we built and published a Python module using NStack. From 7b8765dec4cf7c17f3873129a42bb6e9468909bc Mon Sep 17 00:00:00 2001 From: Leo Anthias Date: Mon, 6 Mar 2017 14:47:01 +0000 Subject: [PATCH 02/12] Merge new changes --- quick_start/workflow.rst | 20 ++++---- reference/supported_integrations.rst | 69 ++++++++++++++++++++++++---- 2 files changed, 71 insertions(+), 18 deletions(-) diff --git a/quick_start/workflow.rst b/quick_start/workflow.rst index e3c9658..17ef747 100644 --- a/quick_start/workflow.rst +++ b/quick_start/workflow.rst @@ -29,29 +29,29 @@ In full, our workflow is going to look like this: .. code:: bash - source(http:///demo : Text) | demo.numChars | sink(log:// : Integer) + sources.http : Text { http_path = "/demo" } | demo.numChars | sinks.log : Integer NStack uses the ``|`` operator to connect statements together, just like in a shell such as ``bash``. We use it to connect together the parts to form our workflow. Let's break these parts to see what we're doing: -======================================= =========== -Part Description -======================================= =========== -``source(http:///demo : Text)`` Use ``http`` as a source, which creates an endpoint on ``/demo``. The ``Text`` statement means it can only accept and pass on Text. +=============================================== =========== +Part Description +=============================================== =========== +``sources.http : Text { http_path = "/demo" }`` Use ``http`` as a source, which creates an endpoint on ``/demo``. The ``Text`` statement means it can only accept and pass on Text. -``demo.numChars`` The name of the method which we built. +``demo.numChars`` The name of the method which we built. -``sink(log:// : Integer)`` Use NStack's log as a sink. The ``Integer`` statement means it can only accept Integers. -======================================= =========== +``sinks.log : Integer`` Use NStack's log as a sink. The ``Integer`` statement means it can only accept Integers. +=============================================== =========== To start this workflow with NStack, we use NStack's ``start`` command: .. code:: bash - > nstack start "source(http:///demo : Text) | demo.numChars | sink(log:// : Integer)" - Started source(http:///demo : Text) | demo.numChars | sink(log:// : Integer) as process 1 + > nstack start 'sources.http : Text { http_path = "/demo" } | demo.numChars | sinks.log : Integer' + Started sources.http : Text { http_path = "/demo" } | demo.numChars | sinks.log : Integer as process 1 We now have a live HTTP endpoint on ``localhost:8080/demo``, running as process ``1`` on NStack. The HTTP endpoint is configured to accept JSON-encoded values. We defined it to use an input schema of ``Text``, so we will be able to send it any JSON ``string``. In our JSON, we put ``params`` as the key, and our input as the value: diff --git a/reference/supported_integrations.rst b/reference/supported_integrations.rst index c642198..cf328cf 100644 --- a/reference/supported_integrations.rst +++ b/reference/supported_integrations.rst @@ -8,14 +8,67 @@ NStack is built to integrate with existing infrastructure, event, and data-sourc .. seealso:: Learn more about *sources* and *sinks* in :ref:`Concepts` **Sources** - - Postgres - - HTTP - - RabbitMQ (AMQP) - - Kafka + - Postgres :: + + sources.postgres : Text { + pg_host = "localhost", pg_port = "5432", + pg_user = "user", pg_password = "123456", + pg_database = "db", pg_query = "SELECT * FROM tbl;" } + + ``pg_port`` defaults to 5432, ``pg_user`` defaults to ``postgres``, and + ``pg_password`` defaults to the empty string. The other parameters are mandatory. + + - HTTP :: + + sources.http : Text { http_path = "/foo" } + + - RabbitMQ (AMQP) :: + + sources.amqp : Text { + amqp_host = "localhost", amqp_port = "5672", + amqp_vhost = "/", amqp_exchange = "ex", + amqp_key = "key" + } + + ``amqp_port`` defaults to 5672 and ``amqp_vhost`` defaults to ``/``. + The other parameters are mandatory. **Sinks** - - Postgres - - NStack Log - - RabbitMQ (AMQP) - - Firebase + - Postgres :: + + sinks.postgres : Text { + pg_host = "localhost", pg_port = "5432", + pg_user = "user", pg_password = "123456", + pg_database = "db", pg_table = "tbl" } + + Like for Postgres source, + ``pg_port`` defaults to 5432, ``pg_user`` defaults to ``postgres``, and + ``pg_password`` defaults to the empty string. The other parameters are mandatory. + + - NStack Log :: + + sinks.log : Text + + The Log sink takes no parameters. + + - RabbitMQ (AMQP) :: + + sinks.amqp : Text { + amqp_host = "localhost", amqp_port = "5672", + amqp_vhost = "/", amqp_exchange = "ex", + amqp_key = "key" + } + + Like for AMQP source, + ``amqp_port`` defaults to 5672 and ``amqp_vhost`` defaults to ``/``. + The other parameters are mandatory. + + - Firebase :: + + sinks.firebase { + firebase_host = "localhost", + firebase_port = "111", + firebase_path = "..." + } + All parameters are mandatory. From d9dd9f7a6d87ac4da9a1a0f89c8bf2e119fd440f Mon Sep 17 00:00:00 2001 From: Leo Anthias Date: Tue, 7 Mar 2017 18:15:44 +0000 Subject: [PATCH 03/12] Update to Roman's syntax --- quick_start/more.rst | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/quick_start/more.rst b/quick_start/more.rst index fc71bcd..5964477 100644 --- a/quick_start/more.rst +++ b/quick_start/more.rst @@ -27,7 +27,8 @@ Field Name Description Type If we are trying to find the species based on the sepal and petal measurements, this means these measurements are going to be the input to our classifier module, with text being the output. This means we need to write a method in Python which takes four ``Double``\s and returns ``Text``. -## Creating your module +Creating your module +******************* To begin, let's make a new directory called ``irisclassify``, ``cd`` into it, and initialise a new module: @@ -143,7 +144,7 @@ Our classifier is now published, but to use it we need to connect it to an event .. code :: bash - ~/irisclassify/ $ nstack start "source(http:///irisendpoint : (Double, Double, Double, Double)) | irisclassify.predict | sink(log:// : Text)" + ~/irisclassify/ $ nstack start "sources.http : (Double, Double, Double, Double) { http_path : "/irisendpoint" } | irisclassify.predict | sinks.log : Text" This creates an HTTP endpoint on ``http://localhost:8080/irisendpoint`` which can receive four ``Double``\s, and writes the results to the log as ``Text``. We can test our classifier by sending it some of the sample data from ``train.csv``: @@ -178,7 +179,7 @@ So far, we've composed workflows out of a source, a sink, and a single method, b .. code :: bash - ~/irisclassify/ $ nstack start "source(http:///irisendpoint : (Double, Double, Double, Double)) | irisclassify.predict | demo.numChars | sink(log:// : Integer)" + ~/irisclassify/ $ nstack start "sources.http (Double, Double, Double, Double) { http_path = "/irisendpoint" } | irisclassify.predict | demo.numChars | sink : Integer" Although you can write workflows directly in the ``start`` command, as we have above, NStack provides a more powerful way to build workflows that allows them to be re-used, shared, and composed together. All of the workflows that are started with the ``start`` command have to be `fully composed`, which means that they contain a source, one or more modules, and a sink. Many times, you may want to write a workflow which is only `partially composed`; for instance, it contains only modules, is a combination of a source and a module, or is a combination of a module and a sink. These workflows cannot be run by themselves, but can be shared and attached to other sources, sinks, or modules when they are started. @@ -203,7 +204,7 @@ Instead of creating an ``nstack.yaml``, this creates a single file, ``workflow.n module irisworkflow { // A sample workflow - def w = source(http:///s : Text) | Module1.numChars | sink(log:// : Integer) + def w = sources.http : Text { http_path = "/s" } | Module1.numChars | sinks.log : Integer } You will notice that the module itself is named ``irisworkflow`` after the directory name, and has an example workflow in it, ``w``. We're going to replace this with our ``speciesLength`` workflow above. @@ -236,14 +237,14 @@ Note that the input type of the workflow is the input type of ``irisclassify.pre .. code :: bash - ~/irisworkflow/ $ nstack start "source(http:///speciesLength : (Double, Double, Double, Double)) | irisworkflow.speciesLength | sink(log:// : Integer)" + ~/irisworkflow/ $ nstack start 'src.http : (Double, Double, Double, Double) { http_path = "speciesLength" } | irisworkflow.speciesLength | sink.log : Integer' Alternatively, you can move the source and sink into the ``workflow.nml`` file: .. code :: java module irisworkflow { - def completeWorkflow = source(http:///speciesLength : (Double, Double, Double, Double)) | irisworkflow.speciesLength | sink(log:// : Integer) + def completeWorkflow = src.http : (Double, Double, Double, Double) { http_path = "speciesLength" } | irisworkflow.speciesLength | sink.log : Integer; } If you ``build`` this, you can then start it by itself with the ``start`` command, because it's a fully composed: @@ -257,7 +258,7 @@ This paradigm can be helpful when we apply it to sources and sinks. Oftentimes, .. code :: java module customerRecords { - def cleanSource = source(postgresql://foo:bar@database.contoso.com/customers?query=SELECT * FROM customer_records : CustomerRecord) | DataTools.cleanCustomerRecord; + def cleanSource = source.postgres { postgres_username = "foo, postgres_password = "bar" @database.contoso.com/customers?query=SELECT * FROM customer_records : CustomerRecord) | DataTools.cleanCustomerRecord; def cleanSink = DataTools.ensureValidCustomer | sink(postgresql://foo:bar@database.contoso.com/customers?table=customer_records : CustomerRecord); } From 7b9187823efd05ae3eeae7a29bbae59488b52249 Mon Sep 17 00:00:00 2001 From: Leo Anthias Date: Thu, 9 Mar 2017 10:41:57 +0000 Subject: [PATCH 04/12] Split up more complex examples, integrate nick's comments on #17, add more content for partial workflows, add new syntax --- quick_start/index.rst | 1 + quick_start/more.rst | 136 +++-------------------------- quick_start/workflow_power.rst | 155 +++++++++++++++++++++++++++++++++ 3 files changed, 166 insertions(+), 126 deletions(-) create mode 100644 quick_start/workflow_power.rst diff --git a/quick_start/index.rst b/quick_start/index.rst index dbe06a2..b53efb1 100644 --- a/quick_start/index.rst +++ b/quick_start/index.rst @@ -17,3 +17,4 @@ Make sure you have :doc:`installed NStack ` and let's get going. module workflow more + workflow_power diff --git a/quick_start/more.rst b/quick_start/more.rst index 5964477..cfcdf89 100644 --- a/quick_start/more.rst +++ b/quick_start/more.rst @@ -1,7 +1,7 @@ .. _more: -Writing Your Own Module -======================== +Example Part #1: Productionising a Classifier +============================================= So far, we have built and published a Python module with a single method on it, ``numChars``, and built a workflow which connects our method to an HTTP endpoint. This in itself isn't particularly useful, so, now that you've got the gist of how NStack works, it's time to build something more realistic! @@ -14,7 +14,7 @@ First, let's look at the the format of our data to see how we should approach th ================ ======================= =========== Field Name Description Type ================ ======================= =========== -``species`` The species of iris Text +``species`` The species of iris Text ``sepal_width`` The width of the sepal Double @@ -27,8 +27,8 @@ Field Name Description Type If we are trying to find the species based on the sepal and petal measurements, this means these measurements are going to be the input to our classifier module, with text being the output. This means we need to write a method in Python which takes four ``Double``\s and returns ``Text``. -Creating your module -******************* +Creating your classifier module +****************************** To begin, let's make a new directory called ``irisclassify``, ``cd`` into it, and initialise a new module: @@ -105,8 +105,8 @@ Now we can write our ``predict`` method. The second argument, ``inputArr``, is t results = self.rf.predict(df) return results.item() -Configuration -************* +Configuring your module +*********************** When your module is started, it is run in a Linux container on the NStack server. Because our module uses libraries like ``pandas`` and ``sklearn``, we have to tell NStack to install some extra operating system libraries inside your module's container. NStack lets us specify these in our ``nstack.yaml``` configuration file in the ``packages`` section. Let's add the following packages: @@ -132,7 +132,7 @@ Now we're ready to build and publish our classifier. Remember, even though we ru Building NStack Container module irisclassify. Please wait. This may take some time. Module irisclassify built successfully. Use `nstack list methods` to see all available methods. -We can see our method, ``irisclassify.predict``. Including our ``demo.numChars`` method from the previous tutorial, we should now have two: +We can now see ``irisclassify.predict`` in the list of existing methods (along with previously built methods like demo.numChars) by running the suggested command nstack list methods .. code :: bash @@ -151,127 +151,11 @@ This creates an HTTP endpoint on ``http://localhost:8080/irisendpoint`` which ca .. code :: bash ~/irisclassify/ $ curl -X PUT -d '{ "params" : [4.7, 1.4, 6.1, 2.9] }' localhost:8080/irisendpoint - Success + Msg Accepted ~/irisclassify/ $ nstack log 2 Feb 17 10:32:30 nostromo nstack-server[8925]: OUTPUT: "Iris-versicolor" -Great! Our classifiier is now productionised. - -Other Sources and Sinks -*********************** - -So far we have used HTTP as a source, and the log as a sink, but NStack supports many other integrations. For instance, we can connect our classifier to use a database as a source and/or a sink by using the ``postgresql`` intergration: - -.. code :: bash - - ~/irisclassify/ $ nstack start "source(postgresql://foo:bar@database.contoso.com/flowers?query=SELECT%20*%20FROM%20iris : (Double, Double, Double, Double) | irisclassify.predict | sink(postgresql://foo:bar@database.contoso.com?table=flowers : Text)" - -In this case, NStack will ensure that the database is of the correct schema. - -.. note :: See all available integrations at :ref:`Supported Integrations ` - -More Powerful Workflows -*********************** - -So far, we've composed workflows out of a source, a sink, and a single method, but workflows can contain as many steps as you like, as long as the output type of one matches the input type of the other. For instance, let's add our ``demo.numChars`` method from the previous tutorial to our workflow. From listing the available methods above, we see that it takes ``Text`` and returns ``Integer``. Because our ``irisclassify.predict`` method returns ``Text``, this means we can connect -- or `compose` -- them together. - -.. note :: ``numChars`` and ``predict`` can be `composed` together because their types -- or schemas -- match. If ``predict`` wasn't configured to output ``Text``, or ``numChars`` wasn't configured to take ``Text`` as input, NStack would not let you build the following workflow. - -.. code :: bash - - ~/irisclassify/ $ nstack start "sources.http (Double, Double, Double, Double) { http_path = "/irisendpoint" } | irisclassify.predict | demo.numChars | sink : Integer" - -Although you can write workflows directly in the ``start`` command, as we have above, NStack provides a more powerful way to build workflows that allows them to be re-used, shared, and composed together. -All of the workflows that are started with the ``start`` command have to be `fully composed`, which means that they contain a source, one or more modules, and a sink. Many times, you may want to write a workflow which is only `partially composed`; for instance, it contains only modules, is a combination of a source and a module, or is a combination of a module and a sink. These workflows cannot be run by themselves, but can be shared and attached to other sources, sinks, or modules when they are started. - -For instance, we could combine ``irisclassify.predict`` and ``demo.numChars`` to form a new workflow ``speciesLength`` like so: - -.. code :: java - - def speciesLength = irisclassify.predict | demo.numChars - -To build workflows like this, we create them as modules in a similar way we created a Python module -- with ``init``. Let's create a new directory called ``irisworkflow``, ``cd`` into it, and create a new workflow module. - -.. code :: bash - - ~/ $ mkdir irisworkflow; cd irisworkflow - ~/irisworkflow/ $ nstack init workflow - Workflow module 'irisworkflow' successfully initialised at /var/home/fedora/irisworkflow - -Instead of creating an ``nstack.yaml``, this creates a single file, ``workflow.nml``, which contains our workflow module. - -.. code :: java - - module irisworkflow { - // A sample workflow - def w = sources.http : Text { http_path = "/s" } | Module1.numChars | sinks.log : Integer - } - -You will notice that the module itself is named ``irisworkflow`` after the directory name, and has an example workflow in it, ``w``. We're going to replace this with our ``speciesLength`` workflow above. - -.. code :: java - - module irisworkflow { - // A sample workflow - def speciesLength = irisclassify.predict | demo.numChars - } - -As with others modules, we can now build ``irisworkflow`` with the ``build`` command: - -.. code :: bash - - ~/irisworkflow/ $ nstack build - Building NStack Workflow module irisworkflow. - Workflow module irisworkflow built successfully. - -Because our workflow ``irisworkflow.speciesLength`` has not been connected to a source or a sink, is is technically a method and is treated as such. This means we can see it in alongside our other methods: - -.. code :: bash - - ~/irisworkflow/ $ nstack list methods - irisclassify.predict : (Double, Double, Double, Double) -> Text - demo.numChars : Text -> Integer - irisworkflow.speciesLength : (Double, Double, Double, Double) -> Integer - -Note that the input type of the workflow is the input type of ``irisclassify.predict``, and the output type is the output type of ``demo.numChars``. Like other methods, this can be connected to a source and a sink to make it `fully composed`: - -.. code :: bash - - ~/irisworkflow/ $ nstack start 'src.http : (Double, Double, Double, Double) { http_path = "speciesLength" } | irisworkflow.speciesLength | sink.log : Integer' - -Alternatively, you can move the source and sink into the ``workflow.nml`` file: - -.. code :: java - - module irisworkflow { - def completeWorkflow = src.http : (Double, Double, Double, Double) { http_path = "speciesLength" } | irisworkflow.speciesLength | sink.log : Integer; - } - -If you ``build`` this, you can then start it by itself with the ``start`` command, because it's a fully composed: - -.. code :: bash - - ~/irisworkflow/ $ nstack start irisworkflow.completeWorkflow - -This paradigm can be helpful when we apply it to sources and sinks. Oftentimes, you -- or someone else in your company -- will want to create sources and sinks which are combined with modules, for instance in the following fictional example: - -.. code :: java - - module customerRecords { - def cleanSource = source.postgres { postgres_username = "foo, postgres_password = "bar" @database.contoso.com/customers?query=SELECT * FROM customer_records : CustomerRecord) | DataTools.cleanCustomerRecord; - def cleanSink = DataTools.ensureValidCustomer | sink(postgresql://foo:bar@database.contoso.com/customers?table=customer_records : CustomerRecord); - } - -Preconfigured sources and sinks can be used in workflows without requiring the user to be familiar with the configuration of the source and sink. -This becomes useful when you are connecting to more complex middleware (such as streams and message queues), which those building modules and workflows may not need to understand or want to configure. Additionally, it allows sources and sinks to be created securely, without the need to share credentials with those building workflows. The user will simply recieve a stream of ``CustomerRecord``, or be able to output a ``CustomerRecord``. -In this example, we are also adding a module to each to do some processing before and after. - -NStack knows that ``cleanSource`` is still a source because is doesn't have a sink attached. Similarly, NStack knows that ``cleanSink`` is a sink, because it doesn't have a source. This means you can find them in your list of sources and sinks using ``list``, and they can be used like any other source and sink, for instance: - -.. code :: bash - - ~/ $ nstack start "customerRecords.cleanSource | customerClassifier.predict | customerRecords.cleanSink" - +Great! Our classifier is now productionised. Next, we're going to connect our classifier to a database, and explore some of the more sophisticated workflows you can build using NStack. diff --git a/quick_start/workflow_power.rst b/quick_start/workflow_power.rst new file mode 100644 index 0000000..b3ded0b --- /dev/null +++ b/quick_start/workflow_power.rst @@ -0,0 +1,155 @@ +.. _workflow_power: + +Example Part #2 : More Powerful Workflows +***************************************** + +Workflows as modules +==================== + +In the previous examples, we composed workflows directly on the command line using the following format: + +.. code :: bash + + $ nstack start "source | module.function | sink" + + +This is useful for testing, but is limited in a few ways: + +- Workflows can't be shared or versioned +- Writing long configuration parameters on the command line is tedious +- It doesn't allow workflows to be composed together (more on this next) + +One of NStack's most powerful features is that it supports creating workflow `module`\s, which can host one or more workflows. When you write workflow modules, workflows can be composed together, combined, versioned, updated, and shared -- just like functions. In fact, a workflow `is` a function on NStack. + +To create a workflow module, we create a new directory and use the ``init`` command. + +.. code :: bash + + ~/ $ mkdir irisworkflow; cd irisworkflow + ~/irisworkflow/ $ nstack init workflow + Workflow module 'irisworkflow' successfully initialised at /var/home/fedora/irisworkflow + +NStack creates a single file, ``workflow.nml``, which contains a sample workflow. + +.. code :: java + + module irisworkflow { + def w = sources.http : Text { http_path = "/foo" } | Module1.numChars | sinks.log : Integer; + } + +In this sample, we have a module called ``irisworkflow`` which has a single workflow on it, ``w``. If we replace ``Module.numChars`` with the function from our previous tutorial, ``demo.numChars``, we can then build this workflow with ``nstack build``. + +As with other modules, we can now build ``irisworkflow`` with the ``build`` command: + +.. code :: bash + + ~/irisworkflow/ $ nstack build + Building NStack Workflow module irisworkflow. + Workflow module irisworkflow built successfully. + +And now start this directly with the start command: + +.. code :: bash + + ~/irisworkflow/ $ nstack start irisworkflow.w + +Multiple Steps +============== + +Workflows can contain as many steps as you like, as long as the output type of one matches the input type of the other. For instance, let's add our ``demo.numChars`` method from the previous tutorial to our workflow. From listing the available methods above, we see that it takes ``Text`` and returns ``Integer``. Because our ``irisclassify.predict`` method returns ``Text``, this means we can connect -- or `compose` -- them together. + +.. note :: ``numChars`` and ``predict`` can be `composed` together because their types -- or schemas -- match. If ``predict`` wasn't configured to output ``Text``, or ``numChars`` wasn't configured to take ``Text`` as input, NStack would not let you build the following workflow. + +.. code :: bash + + module irsiworkflow { + def multipleSteps = sources.http (Double, Double, Double, Double) { http_path = "/irisendpoint" } | irisclassify.predict | demo.numChars | sink : Integer; + } + + +Partial Workflows +================ + +All of the workflows that we have written so far have been `fully composed`, which means that they contain a source, one or more functions, and a sink. Many times, you may want to split up source, sinks, and functions into separate pieces you can share and reuse. In this case, we say that a workflow is `partially composed`, which just means it does not contain a source, one or more functions, and a sink. These workflows cannot be ``start``\ed by themselves, but can be shared and attached to other sources, sinks, or functions to become `fully composed`. + +For instance, we could combine ``irisclassify.predict`` and ``demo.numChars`` from the previous tutorials to form a new workflow ``speciesLength`` like so: + +.. code :: java + + module irisworkflow { + // A sample workflow + def speciesLength = irisclassify.predict | demo.numChars + } + +Because our workflow ``irisworkflow.speciesLength`` has not been connected to a source or a sink, is is technically a function. This means we can see it in alongside our other functions: + +.. code :: bash + + ~/irisworkflow/ $ nstack list functions + irisclassify.predict : (Double, Double, Double, Double) -> Text + demo.numChars : Text -> Integer + irisworkflow.speciesLength : (Double, Double, Double, Double) -> Integer + +Note that the input type of the workflow is the input type of ``irisclassify.predict``, and the output type is the output type of ``demo.numChars``. Like other functions, this must be connected to a source and a sink to make it `fully composed` so it can be started: + +.. code :: bash + + ~/irisworkflow/ $ nstack start 'sources.http : (Double, Double, Double, Double) { http_path = "speciesLength" } | irisworkflow.speciesLength | sink.log : Integer' + +Often times you want to re-use a source or a sink without reconfiguring them. To do this, we can similarly separate the sources and sinks into separate workflows, like so: + +.. code :: java + + module irisworkflow { + def httpEndpoint = sources.http : (Double, Double, Double, Double) { http_path = "speciesLength" }; + def logSink = sinks.log : Text; + def speciesWf = httpEndpoint | irisclassify.predict | logSink; + } + +Using a database as a source +*************************** + +Separating sources and sinks becomes useful when you're connecting to more complex middleware which you don't want to configure each time you use it -- many times you want to reuse a source or sink in multiple workflows. So far we have used HTTP as a source, and the log as a sink, but NStack supports many other integrations. + +.. code :: java + + module irisDatabases { + def petalsAndSepals = sources.postgres : (Double, Double, Double, Double) { + pg_database = "flowers", + pg_query = "SELECT * FROM iris" + }; + + def irisSpecies = sinks.postgres : Text { + pg_database = "flowers", + pg_table = "iris" + }; + } + +.. note :: See all available integrations at :ref:`Supported Integrations ` + +If we built, this module, ``irisDatabases.petalsAndSepals`` and ``irisDatbases.irisSpecies`` could be used other modules as sources and sinks. + +We may also want to add a module to do some pre- or post- processing to a source or sink. For instance: + +.. code :: java + + module irisDatabases { + def petalsAndSepals = sources.postgres : (Double, Double, Double, Double) { + pg_database = "flowers", + pg_query = "SELECT * FROM iris" + }; + + def irisSpecies = sinks.postgres : Text { + pg_database = "flowers", + pg_table = "iris" + }; + + def roundedPetalsSource = petalsAndSepals | PetalsTools.roundPetalLengths; + def irisSpeciesUppercase = TextTools.toUppercase | irisSpecies; + } + +Because ``roundedPetalsSource`` is a combination of a source and a function, it is still a valid source. Similarly, ``irisSpeciesUppercase`` is a combination of a function and a sink, so it is still a valid sink. + +.. note :: Composition rules: a function combined with another function is still a function, a source combined with a function is still a source, and a function combined with a sink is still a sink. + +This means you can find them in your list of sources and sinks using ``list``. From 6b1e7da29d9cc7c0b5305fe49d9d5ae198ce3fe1 Mon Sep 17 00:00:00 2001 From: Leo Anthias Date: Thu, 9 Mar 2017 11:03:52 +0000 Subject: [PATCH 05/12] Fixed some typos, titles formatting --- quick_start/more.rst | 2 +- quick_start/workflow_power.rst | 34 ++++++++++++++++++---------------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/quick_start/more.rst b/quick_start/more.rst index cfcdf89..02c4bfe 100644 --- a/quick_start/more.rst +++ b/quick_start/more.rst @@ -121,7 +121,7 @@ Additionally, we want to tell NStack to copy our ``train.csv`` file into our mod files: ['train.csv'] -Publishing and Starting +Publishing and starting *********************** Now we're ready to build and publish our classifier. Remember, even though we run this command locally, our module gets built and published to your NStack Server. diff --git a/quick_start/workflow_power.rst b/quick_start/workflow_power.rst index b3ded0b..2618d03 100644 --- a/quick_start/workflow_power.rst +++ b/quick_start/workflow_power.rst @@ -1,8 +1,10 @@ .. _workflow_power: -Example Part #2 : More Powerful Workflows +Example Part #2: More Powerful Workflows ***************************************** +Now that we have published our iris classifier to NStack as a module, we can use it to demonstrate some of the more powerful features of the workflow engine. + Workflows as modules ==================== @@ -39,7 +41,6 @@ NStack creates a single file, ``workflow.nml``, which contains a sample workflow In this sample, we have a module called ``irisworkflow`` which has a single workflow on it, ``w``. If we replace ``Module.numChars`` with the function from our previous tutorial, ``demo.numChars``, we can then build this workflow with ``nstack build``. -As with other modules, we can now build ``irisworkflow`` with the ``build`` command: .. code :: bash @@ -53,26 +54,32 @@ And now start this directly with the start command: ~/irisworkflow/ $ nstack start irisworkflow.w -Multiple Steps +Multiple steps ============== -Workflows can contain as many steps as you like, as long as the output type of one matches the input type of the other. For instance, let's add our ``demo.numChars`` method from the previous tutorial to our workflow. From listing the available methods above, we see that it takes ``Text`` and returns ``Integer``. Because our ``irisclassify.predict`` method returns ``Text``, this means we can connect -- or `compose` -- them together. +Workflows can contain as many steps as you like, as long as the output type of one matches the input type of the other. For instance, let's say we wanted to create the following workflow: -.. note :: ``numChars`` and ``predict`` can be `composed` together because their types -- or schemas -- match. If ``predict`` wasn't configured to output ``Text``, or ``numChars`` wasn't configured to take ``Text`` as input, NStack would not let you build the following workflow. +- Expose an HTTP endpoint which takes four ``Double``\s +- Send these ``Double``\s to our classifier, ``irisclassify``, which will tell us the species of the iris +- Count the number of characters in the species of the iris +- Write the result to the log + +We could write the following workflow: .. code :: bash module irsiworkflow { - def multipleSteps = sources.http (Double, Double, Double, Double) { http_path = "/irisendpoint" } | irisclassify.predict | demo.numChars | sink : Integer; + def multipleSteps = sources.http (Double, Double, Double, Double) { http_path = "/irisendpoint" } | irisclassify.predict | demo.numChars | sinks.log : Integer; } +.. note :: ``numChars`` and ``predict`` can be `composed` together because their types -- or schemas -- match. If ``predict`` wasn't configured to output ``Text``, or ``numChars`` wasn't configured to take ``Text`` as input, NStack would not let you build the following workflow. -Partial Workflows +Partial workflows ================ -All of the workflows that we have written so far have been `fully composed`, which means that they contain a source, one or more functions, and a sink. Many times, you may want to split up source, sinks, and functions into separate pieces you can share and reuse. In this case, we say that a workflow is `partially composed`, which just means it does not contain a source, one or more functions, and a sink. These workflows cannot be ``start``\ed by themselves, but can be shared and attached to other sources, sinks, or functions to become `fully composed`. +All of the workflows that we have written so far have been `fully composed`, which means that they contain a source, one or more functions, and a sink. Many times, you want to split up sources, sinks, and functions into separate pieces you can share and reuse. In this case, we say that a workflow is `partially composed`, which just means it does not contain a source, one or more functions, and a sink. These workflows cannot be ``start``\ed by themselves, but can be shared and attached to other sources, sinks, or functions to become `fully composed`. -For instance, we could combine ``irisclassify.predict`` and ``demo.numChars`` from the previous tutorials to form a new workflow ``speciesLength`` like so: +For instance, we could combine ``irisclassify.predict`` and ``demo.numChars`` from the previous example to form a new workflow ``speciesLength`` like so: .. code :: java @@ -81,7 +88,7 @@ For instance, we could combine ``irisclassify.predict`` and ``demo.numChars`` fr def speciesLength = irisclassify.predict | demo.numChars } -Because our workflow ``irisworkflow.speciesLength`` has not been connected to a source or a sink, is is technically a function. This means we can see it in alongside our other functions: +Because our workflow ``irisworkflow.speciesLength`` has not been connected to a source or a sink, is is technically a function. If we build this workflow, we can see ``speciesLength`` alongside our other functions by using the ``list`` command: .. code :: bash @@ -90,7 +97,7 @@ Because our workflow ``irisworkflow.speciesLength`` has not been connected to a demo.numChars : Text -> Integer irisworkflow.speciesLength : (Double, Double, Double, Double) -> Integer -Note that the input type of the workflow is the input type of ``irisclassify.predict``, and the output type is the output type of ``demo.numChars``. Like other functions, this must be connected to a source and a sink to make it `fully composed` so it can be started: +As we would expect, the input type of the workflow is the input type of ``irisclassify.predict``, and the output type is the output type of ``demo.numChars``. Like other functions, this must be connected to a source and a sink to make it `fully composed` -- and thus something that can be started with the ``start`` command: .. code :: bash @@ -106,9 +113,6 @@ Often times you want to re-use a source or a sink without reconfiguring them. To def speciesWf = httpEndpoint | irisclassify.predict | logSink; } -Using a database as a source -*************************** - Separating sources and sinks becomes useful when you're connecting to more complex middleware which you don't want to configure each time you use it -- many times you want to reuse a source or sink in multiple workflows. So far we have used HTTP as a source, and the log as a sink, but NStack supports many other integrations. .. code :: java @@ -125,8 +129,6 @@ Separating sources and sinks becomes useful when you're connecting to more compl }; } -.. note :: See all available integrations at :ref:`Supported Integrations ` - If we built, this module, ``irisDatabases.petalsAndSepals`` and ``irisDatbases.irisSpecies`` could be used other modules as sources and sinks. We may also want to add a module to do some pre- or post- processing to a source or sink. For instance: From f8700f96a13a492d119cce08a27733a3255cf9d0 Mon Sep 17 00:00:00 2001 From: Leo Anthias Date: Thu, 16 Mar 2017 11:03:43 +0000 Subject: [PATCH 06/12] Fixed line type --- quick_start/workflow_power.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quick_start/workflow_power.rst b/quick_start/workflow_power.rst index 2618d03..7793707 100644 --- a/quick_start/workflow_power.rst +++ b/quick_start/workflow_power.rst @@ -1,7 +1,7 @@ .. _workflow_power: Example Part #2: More Powerful Workflows -***************************************** +======================================== Now that we have published our iris classifier to NStack as a module, we can use it to demonstrate some of the more powerful features of the workflow engine. From 4d11e75465fb1640b4b31bcb72fb7670803b0dec Mon Sep 17 00:00:00 2001 From: Leo Anthias Date: Thu, 23 Mar 2017 14:48:56 +0000 Subject: [PATCH 07/12] Methods -> Functions, and reorganisation --- advanced_start/index.rst | 15 ++++++++++++ {quick_start => advanced_start}/more.rst | 24 +++++++++---------- .../workflow_power.rst | 10 ++++---- index.rst | 1 + quick_start/index.rst | 6 ++--- 5 files changed, 35 insertions(+), 21 deletions(-) create mode 100644 advanced_start/index.rst rename {quick_start => advanced_start}/more.rst (77%) rename {quick_start => advanced_start}/workflow_power.rst (98%) diff --git a/advanced_start/index.rst b/advanced_start/index.rst new file mode 100644 index 0000000..96c1a1d --- /dev/null +++ b/advanced_start/index.rst @@ -0,0 +1,15 @@ +.. _advanced_start_index: + +****************** +Advanced Tutorial +****************** + +In this section, we're going to productionise a Random Forest classifier written with `sklearn`, deploy it to the cloud, and use it in a more sophisticated workflow. + +By the end of the tutorial, you will learn how to build modules with dependencies, write more sophisticated workflows, and build abstractions over data-sources. Enjoy! + + +.. toctree:: + + more + workflow_power diff --git a/quick_start/more.rst b/advanced_start/more.rst similarity index 77% rename from quick_start/more.rst rename to advanced_start/more.rst index 02c4bfe..91e96a3 100644 --- a/quick_start/more.rst +++ b/advanced_start/more.rst @@ -1,9 +1,9 @@ .. _more: -Example Part #1: Productionising a Classifier -============================================= +Productionising a Classifier as an NStack Module +================================================ -So far, we have built and published a Python module with a single method on it, ``numChars``, and built a workflow which connects our method to an HTTP endpoint. This in itself isn't particularly useful, so, now that you've got the gist of how NStack works, it's time to build something more realistic! +So far, we have built and published a Python module with a single function on it, ``numChars``, and built a workflow which connects our function to an HTTP endpoint. This in itself isn't particularly useful, so, now that you've got the gist of how NStack works, it's time to build something more realistic! In this tutorial, we're going to create and productionise a simple classifier which uses the famous `iris dataset `_. @@ -25,7 +25,7 @@ Field Name Description Type ``petal_length`` The length of the petal Double ================ ======================= =========== -If we are trying to find the species based on the sepal and petal measurements, this means these measurements are going to be the input to our classifier module, with text being the output. This means we need to write a method in Python which takes four ``Double``\s and returns ``Text``. +If we are trying to find the species based on the sepal and petal measurements, this means these measurements are going to be the input to our classifier module, with text being the output. This means we need to write a function in Python which takes four ``Double``\s and returns ``Text``. Creating your classifier module ****************************** @@ -48,7 +48,7 @@ Next, let's download our training data into this so we can use it in our module. Defining our API **************** -As we know what the input and output of our classifier is going to look like, let's edit the ``api`` section of ``nstack.yaml`` to define our API (i.e. the entry-point into our module). By default a new module contains a sample method ``numChars``, which we can replace with our definition. We're going to call the method we write in Python ``predict``, which means we can fill in the ``api`` section of ``nstack.yaml`` as follows: +As we know what the input and output of our classifier is going to look like, let's edit the ``api`` section of ``nstack.yaml`` to define our API (i.e. the entry-point into our module). By default a new module contains a sample function ``numChars``, which we can replace with our definition. We're going to call the function we write in Python ``predict``, which means we can fill in the ``api`` section of ``nstack.yaml`` as follows: .. code :: java @@ -57,14 +57,14 @@ As we know what the input and output of our classifier is going to look like, le predict : (Double, Double, Double, Double) -> Text } -This means we are exposing a single method ``predict``, which takes a record of four ``Double``\s (the measurements) and returns ``Text`` (the iris species). +This means we are exposing a single function ``predict``, which takes a record of four ``Double``\s (the measurements) and returns ``Text`` (the iris species). Writing our classifier ********************** Now that we've defined our API, let's jump into our Python module, which lives in ``service.py``. -We see that NStack has created a class ``Service``. This is where we add the methods for our module. Right now it also has a sample method in it, ``numChars``, which we can remove. +We see that NStack has created a class ``Service``. This is where we add the functions for our module. Right now it also has a sample function in it, ``numChars``, which we can remove. Firstly, let's import the libaries we're using. @@ -78,7 +78,7 @@ Firstly, let's import the libaries we're using. .. note :: Python modules must also import ``nstack`` -Before we add our ``predict`` method, we're going to add ``__init__``, the Python contructor method which runs upon the creation of our module. It's going to load our data from ``train.csv``, and use it to train our Random Forest classifier: +Before we add our ``predict`` function, we're going to add ``__init__``, the Python contructor function which runs upon the creation of our module. It's going to load our data from ``train.csv``, and use it to train our Random Forest classifier: .. code :: python @@ -94,7 +94,7 @@ Before we add our ``predict`` method, we're going to add ``__init__``, the Pytho rf.fit(trainArr, trainRes) self.rf = rf -Now we can write our ``predict`` method. The second argument, ``inputArr``, is the input -- in this case, our four ``Double``\s. To return text, we simply return from the method in Python. +Now we can write our ``predict`` function. The second argument, ``inputArr``, is the input -- in this case, our four ``Double``\s. To return text, we simply return from the function in Python. .. code :: python @@ -130,13 +130,13 @@ Now we're ready to build and publish our classifier. Remember, even though we ru ~/irisclassify/ $ nstack build Building NStack Container module irisclassify. Please wait. This may take some time. - Module irisclassify built successfully. Use `nstack list methods` to see all available methods. + Module irisclassify built successfully. Use `nstack list functions` to see all available functions. -We can now see ``irisclassify.predict`` in the list of existing methods (along with previously built methods like demo.numChars) by running the suggested command nstack list methods +We can now see ``irisclassify.predict`` in the list of existing functions (along with previously built functions like demo.numChars) by running the suggested command nstack list functions .. code :: bash - ~/irisclassify/ $ nstack list methods + ~/irisclassify/ $ nstack list functions irisclassify.predict : (Double, Double, Double, Double) -> Text demo.numChars : Text -> Integer diff --git a/quick_start/workflow_power.rst b/advanced_start/workflow_power.rst similarity index 98% rename from quick_start/workflow_power.rst rename to advanced_start/workflow_power.rst index 7793707..cd4b2e7 100644 --- a/quick_start/workflow_power.rst +++ b/advanced_start/workflow_power.rst @@ -1,12 +1,12 @@ .. _workflow_power: -Example Part #2: More Powerful Workflows -======================================== +More Powerful Workflows +======================= Now that we have published our iris classifier to NStack as a module, we can use it to demonstrate some of the more powerful features of the workflow engine. Workflows as modules -==================== +******************** In the previous examples, we composed workflows directly on the command line using the following format: @@ -55,7 +55,7 @@ And now start this directly with the start command: ~/irisworkflow/ $ nstack start irisworkflow.w Multiple steps -============== +************** Workflows can contain as many steps as you like, as long as the output type of one matches the input type of the other. For instance, let's say we wanted to create the following workflow: @@ -75,7 +75,7 @@ We could write the following workflow: .. note :: ``numChars`` and ``predict`` can be `composed` together because their types -- or schemas -- match. If ``predict`` wasn't configured to output ``Text``, or ``numChars`` wasn't configured to take ``Text`` as input, NStack would not let you build the following workflow. Partial workflows -================ +***************** All of the workflows that we have written so far have been `fully composed`, which means that they contain a source, one or more functions, and a sink. Many times, you want to split up sources, sinks, and functions into separate pieces you can share and reuse. In this case, we say that a workflow is `partially composed`, which just means it does not contain a source, one or more functions, and a sink. These workflows cannot be ``start``\ed by themselves, but can be shared and attached to other sources, sinks, or functions to become `fully composed`. diff --git a/index.rst b/index.rst index 4240f8c..0835442 100644 --- a/index.rst +++ b/index.rst @@ -15,6 +15,7 @@ Welcome to the NStack Documentation! concepts installation quick_start/index + advanced_start/index architecture reference/index diff --git a/quick_start/index.rst b/quick_start/index.rst index b53efb1..f70afc9 100644 --- a/quick_start/index.rst +++ b/quick_start/index.rst @@ -1,7 +1,7 @@ .. _quick_start_index: ****************** -Quick Start +Quick Tutorial ****************** In this section, we're going to see how to build up a simple NStack module, deploy it to the cloud, and use it in a workflow by connecting it to a `source` and a `sink`. @@ -10,11 +10,9 @@ By the end of the tutorial, you will learn how to publish your code to NStack an .. note:: To learn more about modules, sources, and sinks, read :ref:`Concepts` -Make sure you have :doc:`installed NStack ` and let's get going. +Make sure you have :doc:`installed NStack `. .. toctree:: module workflow - more - workflow_power From aa8226d94eba1d23888be38060b1c815e7f4c33b Mon Sep 17 00:00:00 2001 From: Leo Anthias Date: Thu, 23 Mar 2017 16:52:00 +0000 Subject: [PATCH 08/12] JB changes --- advanced_start/index.rst | 2 +- advanced_start/more.rst | 12 ++++++------ advanced_start/workflow_power.rst | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/advanced_start/index.rst b/advanced_start/index.rst index 96c1a1d..ce904a5 100644 --- a/advanced_start/index.rst +++ b/advanced_start/index.rst @@ -4,7 +4,7 @@ Advanced Tutorial ****************** -In this section, we're going to productionise a Random Forest classifier written with `sklearn`, deploy it to the cloud, and use it in a more sophisticated workflow. +In this section, we're going to productionise a Random Forest classifier written with `sklearn `_, deploy it to the cloud, and use it in a more sophisticated workflow. By the end of the tutorial, you will learn how to build modules with dependencies, write more sophisticated workflows, and build abstractions over data-sources. Enjoy! diff --git a/advanced_start/more.rst b/advanced_start/more.rst index 91e96a3..739e57a 100644 --- a/advanced_start/more.rst +++ b/advanced_start/more.rst @@ -38,7 +38,7 @@ To begin, let's make a new directory called ``irisclassify``, ``cd`` into it, an ~/irisclassify/ $ nstack init python python module 'irisclassify' successfully initialised at ~/irisclassify -Next, let's download our training data into this so we can use it in our module. +Next, let's download our training data so we can use it in our module. .. code:: bash @@ -48,7 +48,7 @@ Next, let's download our training data into this so we can use it in our module. Defining our API **************** -As we know what the input and output of our classifier is going to look like, let's edit the ``api`` section of ``nstack.yaml`` to define our API (i.e. the entry-point into our module). By default a new module contains a sample function ``numChars``, which we can replace with our definition. We're going to call the function we write in Python ``predict``, which means we can fill in the ``api`` section of ``nstack.yaml`` as follows: +As we know what the input and output of our classifier is going to look like, let's edit the ``api`` section of ``nstack.yaml`` to define our API (i.e. the entry-point into our module). By default, a new module contains a sample function ``numChars``, which we replace with our definition. We're going to call the function we write in Python ``predict``, which means we fill in the ``api`` section of ``nstack.yaml`` as follows: .. code :: java @@ -78,7 +78,7 @@ Firstly, let's import the libaries we're using. .. note :: Python modules must also import ``nstack`` -Before we add our ``predict`` function, we're going to add ``__init__``, the Python contructor function which runs upon the creation of our module. It's going to load our data from ``train.csv``, and use it to train our Random Forest classifier: +Before we add our ``predict`` function, we're going to add ``__init__``, the Python constructor function which runs upon the creation of our module. It's going to load our data from ``train.csv``, and use it to train our Random Forest classifier: .. code :: python @@ -112,7 +112,7 @@ When your module is started, it is run in a Linux container on the NStack server .. code :: yaml - packages: ['numpy', 'python3-scikit-learn.x86_64', 'scipy', 'python3-scikit-image.x86_64', 'python3-pandas.x86_64'] + packages: ['numpy', 'python3-scikit-learn', 'scipy', 'python3-scikit-image', 'python3-pandas'] Additionally, we want to tell NStack to copy our ``train.csv`` file into our module, so we can use it to train our data. ``nstack.yaml`` also has a section for specifying files you'd like to include: @@ -132,7 +132,7 @@ Now we're ready to build and publish our classifier. Remember, even though we ru Building NStack Container module irisclassify. Please wait. This may take some time. Module irisclassify built successfully. Use `nstack list functions` to see all available functions. -We can now see ``irisclassify.predict`` in the list of existing functions (along with previously built functions like demo.numChars) by running the suggested command nstack list functions +We can now see ``irisclassify.predict`` in the list of existing functions (along with previously built functions like `demo.numChars`) by running the suggested command .. code :: bash @@ -140,7 +140,7 @@ We can now see ``irisclassify.predict`` in the list of existing functions (along irisclassify.predict : (Double, Double, Double, Double) -> Text demo.numChars : Text -> Integer -Our classifier is now published, but to use it we need to connect it to an event-source and sink. In the previous tutorial, we used HTTP as a source, and the NStack log as a sink. We can do the same here by starting the following workflow. +Our classifier is now published, but to use it we need to connect it to an event source and sink. In the previous tutorial, we used HTTP as a source, and the NStack log as a sink. We can do the same here by starting the following workflow. .. code :: bash diff --git a/advanced_start/workflow_power.rst b/advanced_start/workflow_power.rst index cd4b2e7..bb8a1f8 100644 --- a/advanced_start/workflow_power.rst +++ b/advanced_start/workflow_power.rst @@ -68,7 +68,7 @@ We could write the following workflow: .. code :: bash - module irsiworkflow { + module irisworkflow { def multipleSteps = sources.http (Double, Double, Double, Double) { http_path = "/irisendpoint" } | irisclassify.predict | demo.numChars | sinks.log : Integer; } From 3016d6af58d4451f8d793dfc4a4e7749352abe66 Mon Sep 17 00:00:00 2001 From: Leo Anthias Date: Thu, 23 Mar 2017 17:37:56 +0000 Subject: [PATCH 09/12] remove functions from fully composed workflows --- advanced_start/workflow_power.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/advanced_start/workflow_power.rst b/advanced_start/workflow_power.rst index bb8a1f8..ee9ba8b 100644 --- a/advanced_start/workflow_power.rst +++ b/advanced_start/workflow_power.rst @@ -77,7 +77,7 @@ We could write the following workflow: Partial workflows ***************** -All of the workflows that we have written so far have been `fully composed`, which means that they contain a source, one or more functions, and a sink. Many times, you want to split up sources, sinks, and functions into separate pieces you can share and reuse. In this case, we say that a workflow is `partially composed`, which just means it does not contain a source, one or more functions, and a sink. These workflows cannot be ``start``\ed by themselves, but can be shared and attached to other sources, sinks, or functions to become `fully composed`. +All of the workflows that we have written so far have been `fully composed`, which means that they contain a source and a sink. Many times, you want to split up sources, sinks, and functions into separate pieces you can share and reuse. In this case, we say that a workflow is `partially composed`, which just means it does not contain a source and a sink. These workflows cannot be ``start``\ed by themselves, but can be shared and attached to other sources and/or sinks to become `fully composed`. For instance, we could combine ``irisclassify.predict`` and ``demo.numChars`` from the previous example to form a new workflow ``speciesLength`` like so: From 4875f67ee078995b6950743f6531b8d9f5181b2f Mon Sep 17 00:00:00 2001 From: Leo Anthias Date: Fri, 24 Mar 2017 19:20:51 +0000 Subject: [PATCH 10/12] Some updates to new UX --- advanced_start/more.rst | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/advanced_start/more.rst b/advanced_start/more.rst index 739e57a..94d33d0 100644 --- a/advanced_start/more.rst +++ b/advanced_start/more.rst @@ -34,15 +34,15 @@ To begin, let's make a new directory called ``irisclassify``, ``cd`` into it, an .. code:: bash - ~/ $ mkdir irisclassify; cd irisclassify - ~/irisclassify/ $ nstack init python - python module 'irisclassify' successfully initialised at ~/irisclassify + ~/ $ mkdir Irisclassify; cd Irisclassify + ~/Irisclassify/ $ nstack init python + python module 'Irisclassify' successfully initialised at ~/Irisclassify Next, let's download our training data so we can use it in our module. .. code:: bash - ~/irisclassify/ $ curl -O https://raw.githubusercontent.com/nstackcom/nstack-examples/master/iris/irisclassify/train.csv + ~/Irisclassify/ $ curl -O https://raw.githubusercontent.com/nstackcom/nstack-examples/master/iris/irisclassify/train.csv Defining our API @@ -53,9 +53,7 @@ As we know what the input and output of our classifier is going to look like, le .. code :: java api : | - interface Default { predict : (Double, Double, Double, Double) -> Text - } This means we are exposing a single function ``predict``, which takes a record of four ``Double``\s (the measurements) and returns ``Text`` (the iris species). @@ -66,7 +64,6 @@ Writing our classifier Now that we've defined our API, let's jump into our Python module, which lives in ``service.py``. We see that NStack has created a class ``Service``. This is where we add the functions for our module. Right now it also has a sample function in it, ``numChars``, which we can remove. - Firstly, let's import the libaries we're using. .. code :: python @@ -120,7 +117,6 @@ Additionally, we want to tell NStack to copy our ``train.csv`` file into our mod files: ['train.csv'] - Publishing and starting *********************** @@ -128,31 +124,31 @@ Now we're ready to build and publish our classifier. Remember, even though we ru .. code :: bash - ~/irisclassify/ $ nstack build - Building NStack Container module irisclassify. Please wait. This may take some time. - Module irisclassify built successfully. Use `nstack list functions` to see all available functions. + ~/Irisclassify/ $ nstack build + Building NStack Container module Irisclassify. Please wait. This may take some time. + Module Irisclassify built successfully. Use `nstack list functions` to see all available functions. We can now see ``irisclassify.predict`` in the list of existing functions (along with previously built functions like `demo.numChars`) by running the suggested command .. code :: bash ~/irisclassify/ $ nstack list functions - irisclassify.predict : (Double, Double, Double, Double) -> Text - demo.numChars : Text -> Integer + Irisclassify.predict : (Double, Double, Double, Double) -> Text + Demo.numChars : Text -> Integer Our classifier is now published, but to use it we need to connect it to an event source and sink. In the previous tutorial, we used HTTP as a source, and the NStack log as a sink. We can do the same here by starting the following workflow. .. code :: bash - ~/irisclassify/ $ nstack start "sources.http : (Double, Double, Double, Double) { http_path : "/irisendpoint" } | irisclassify.predict | sinks.log : Text" + ~/Irisclassify/ $ nstack start "sources.http<(Double, Double, Double, Double)> { http_path : "/irisendpoint" } | irisclassify.predict | sinks.log" This creates an HTTP endpoint on ``http://localhost:8080/irisendpoint`` which can receive four ``Double``\s, and writes the results to the log as ``Text``. We can test our classifier by sending it some of the sample data from ``train.csv``: .. code :: bash - ~/irisclassify/ $ curl -X PUT -d '{ "params" : [4.7, 1.4, 6.1, 2.9] }' localhost:8080/irisendpoint + ~/Irisclassify/ $ curl -X PUT -d '{ "params" : [4.7, 1.4, 6.1, 2.9] }' localhost:8080/irisendpoint Msg Accepted - ~/irisclassify/ $ nstack log 2 + ~/Irisclassify/ $ nstack log 2 Feb 17 10:32:30 nostromo nstack-server[8925]: OUTPUT: "Iris-versicolor" Great! Our classifier is now productionised. Next, we're going to connect our classifier to a database, and explore some of the more sophisticated workflows you can build using NStack. From 52e082a57428e06f610911b2b14536f2f5ef5f1e Mon Sep 17 00:00:00 2001 From: Leo Anthias Date: Wed, 29 Mar 2017 13:18:37 +0100 Subject: [PATCH 11/12] updated with new module versioning syntax, fixed a few typos, restructured --- advanced_start/more.rst | 77 ++++++++-------- advanced_start/workflow_power.rst | 143 +++++++++++------------------- 2 files changed, 93 insertions(+), 127 deletions(-) diff --git a/advanced_start/more.rst b/advanced_start/more.rst index 739e57a..75990a0 100644 --- a/advanced_start/more.rst +++ b/advanced_start/more.rst @@ -5,9 +5,8 @@ Productionising a Classifier as an NStack Module So far, we have built and published a Python module with a single function on it, ``numChars``, and built a workflow which connects our function to an HTTP endpoint. This in itself isn't particularly useful, so, now that you've got the gist of how NStack works, it's time to build something more realistic! -In this tutorial, we're going to create and productionise a simple classifier which uses the famous `iris dataset `_. - -We're going to train our classifier to classify which species an iris is, given measurements of its sepals and petals. You can find the dataset we're using to train our model `here `_. +In this tutorial, we're going to create and productionise a simple classifier which uses the famous `iris dataset `_. +We're going to train our classifier to classify which species an iris is, given measurements of its sepals and petals. You can find the dataset we're using to train our model `here `_. First, let's look at the the format of our data to see how we should approach the problem. We see that we have five fields: @@ -28,46 +27,43 @@ Field Name Description Type If we are trying to find the species based on the sepal and petal measurements, this means these measurements are going to be the input to our classifier module, with text being the output. This means we need to write a function in Python which takes four ``Double``\s and returns ``Text``. Creating your classifier module -****************************** +------------------------------ -To begin, let's make a new directory called ``irisclassify``, ``cd`` into it, and initialise a new module: +To begin, let's make a new directory called ``Irisclassify``, ``cd`` into it, and initialise a new Python module: .. code:: bash - ~/ $ mkdir irisclassify; cd irisclassify - ~/irisclassify/ $ nstack init python - python module 'irisclassify' successfully initialised at ~/irisclassify + ~/ $ mkdir Irisclassify; cd Irisclassify + ~/Irisclassify/ $ nstack init python + python module 'Irisclassify' successfully initialised at ~/Irisclassify -Next, let's download our training data so we can use it in our module. +Next, let's download our training data into this directory so we can use it in our module. We have hosted it for you as a CSV on GitHub. .. code:: bash - ~/irisclassify/ $ curl -O https://raw.githubusercontent.com/nstackcom/nstack-examples/master/iris/irisclassify/train.csv + ~/Irisclassify/ $ curl -O https://raw.githubusercontent.com/nstackcom/nstack-examples/master/iris/Irisclassify/train.csv Defining our API -**************** +---------------- As we know what the input and output of our classifier is going to look like, let's edit the ``api`` section of ``nstack.yaml`` to define our API (i.e. the entry-point into our module). By default, a new module contains a sample function ``numChars``, which we replace with our definition. We're going to call the function we write in Python ``predict``, which means we fill in the ``api`` section of ``nstack.yaml`` as follows: .. code :: java api : | - interface Default { - predict : (Double, Double, Double, Double) -> Text - } + predict : (Double, Double, Double, Double) -> Text -This means we are exposing a single function ``predict``, which takes a record of four ``Double``\s (the measurements) and returns ``Text`` (the iris species). +This means we want to productionise a single function, ``predict``, which takes four ``Double``\s (the measurements) and returns ``Text`` (the iris species). Writing our classifier -********************** +---------------------- Now that we've defined our API, let's jump into our Python module, which lives in ``service.py``. We see that NStack has created a class ``Service``. This is where we add the functions for our module. Right now it also has a sample function in it, ``numChars``, which we can remove. - -Firstly, let's import the libaries we're using. +Let's import the libaries we're using. .. code :: python @@ -106,15 +102,15 @@ Now we can write our ``predict`` function. The second argument, ``inputArr``, is return results.item() Configuring your module -*********************** +----------------------- -When your module is started, it is run in a Linux container on the NStack server. Because our module uses libraries like ``pandas`` and ``sklearn``, we have to tell NStack to install some extra operating system libraries inside your module's container. NStack lets us specify these in our ``nstack.yaml``` configuration file in the ``packages`` section. Let's add the following packages: +When your module is started, it is run in a Linux container on the NStack server. Because our module uses libraries like ``pandas`` and ``sklearn``, we have to tell NStack to install some extra operating system libraries inside your module's container. NStack lets us specify these in our ``nstack.yaml`` configuration file in the ``packages`` section. Let's add the following packages: .. code :: yaml packages: ['numpy', 'python3-scikit-learn', 'scipy', 'python3-scikit-image', 'python3-pandas'] -Additionally, we want to tell NStack to copy our ``train.csv`` file into our module, so we can use it to train our data. ``nstack.yaml`` also has a section for specifying files you'd like to include: +Additionally, we want to tell NStack to copy our ``train.csv`` file into our module, so we can use it in ``__init__``. ``nstack.yaml`` also has a section for specifying files you'd like to include: .. code :: yaml @@ -122,40 +118,51 @@ Additionally, we want to tell NStack to copy our ``train.csv`` file into our mod Publishing and starting -*********************** +----------------------- -Now we're ready to build and publish our classifier. Remember, even though we run this command locally, our module gets built and published to your NStack Server. +Now we're ready to build and publish our classifier. Remember, even though we run this command locally, our module gets built and published on your NStack server in the cloud. .. code :: bash - ~/irisclassify/ $ nstack build - Building NStack Container module irisclassify. Please wait. This may take some time. - Module irisclassify built successfully. Use `nstack list functions` to see all available functions. + ~/Irisclassify/ $ nstack build + Building NStack Container module Irisclassify. Please wait. This may take some time. + Module Irisclassify built successfully. Use `nstack list functions` to see all available functions. -We can now see ``irisclassify.predict`` in the list of existing functions (along with previously built functions like `demo.numChars`) by running the suggested command +We can now see ``Irisclassify.predict`` in the list of existing functions (along with previously built functions like ``demo.numChars``), .. code :: bash - ~/irisclassify/ $ nstack list functions - irisclassify.predict : (Double, Double, Double, Double) -> Text + ~/Irisclassify/ $ nstack list functions + Irisclassify.predict : (Double, Double, Double, Double) -> Text demo.numChars : Text -> Integer -Our classifier is now published, but to use it we need to connect it to an event source and sink. In the previous tutorial, we used HTTP as a source, and the NStack log as a sink. We can do the same here by starting the following workflow. +Our classifier is now published, but to use it we need to connect it to an event source and sink. In the previous tutorial, we used HTTP as a source, and the NStack log as a sink. We can do the same here. This time, instead of creating a workflow module right away, we can use nstack's ``notebook`` command to test our workflow first. ``notebook`` opens an interactive shell where we can write our workflow. When we are finished, we can ``Ctrl-D``. .. code :: bash - ~/irisclassify/ $ nstack start "sources.http : (Double, Double, Double, Double) { http_path : "/irisendpoint" } | irisclassify.predict | sinks.log : Text" + ~/Irisclassify/ $ nstack notebook + import Irisclassify:0.0.1-SNAPSHOT as Classifier + Sources.http<(Double, Double, Double, Double) | Classifier.predict | Sinks.log + [Ctrl-D] + +This creates an HTTP endpoint on ``http://localhost:8080/irisendpoint`` which can receive four ``Double``\s, and writes the results to the log as ``Text``. Let's check it is running as a process: + +.. code :: bash + + ~/Irisclassify/ $ nstack ps + 1 + 2 -This creates an HTTP endpoint on ``http://localhost:8080/irisendpoint`` which can receive four ``Double``\s, and writes the results to the log as ``Text``. We can test our classifier by sending it some of the sample data from ``train.csv``: +In this instance, it is running as process ``2``. We can test our classifier by sending it some of the sample data from ``train.csv``. .. code :: bash - ~/irisclassify/ $ curl -X PUT -d '{ "params" : [4.7, 1.4, 6.1, 2.9] }' localhost:8080/irisendpoint + ~/Irisclassify/ $ curl -X PUT -d '{ "params" : [4.7, 1.4, 6.1, 2.9] }' localhost:8080/irisendpoint Msg Accepted - ~/irisclassify/ $ nstack log 2 + ~/Irisclassify/ $ nstack log 2 Feb 17 10:32:30 nostromo nstack-server[8925]: OUTPUT: "Iris-versicolor" -Great! Our classifier is now productionised. Next, we're going to connect our classifier to a database, and explore some of the more sophisticated workflows you can build using NStack. +Our classifier is now productionised. Next, we're going explore some of the more sophisticated workflows you can build using NStack. diff --git a/advanced_start/workflow_power.rst b/advanced_start/workflow_power.rst index ee9ba8b..b944960 100644 --- a/advanced_start/workflow_power.rst +++ b/advanced_start/workflow_power.rst @@ -3,155 +3,114 @@ More Powerful Workflows ======================= -Now that we have published our iris classifier to NStack as a module, we can use it to demonstrate some of the more powerful features of the workflow engine. +Now that we have published our classifier to NStack as a module, we can use it to demonstrate some of the more powerful features of the workflow engine. -Workflows as modules -******************** - -In the previous examples, we composed workflows directly on the command line using the following format: - -.. code :: bash - - $ nstack start "source | module.function | sink" - - -This is useful for testing, but is limited in a few ways: - -- Workflows can't be shared or versioned -- Writing long configuration parameters on the command line is tedious -- It doesn't allow workflows to be composed together (more on this next) - -One of NStack's most powerful features is that it supports creating workflow `module`\s, which can host one or more workflows. When you write workflow modules, workflows can be composed together, combined, versioned, updated, and shared -- just like functions. In fact, a workflow `is` a function on NStack. - -To create a workflow module, we create a new directory and use the ``init`` command. - -.. code :: bash - - ~/ $ mkdir irisworkflow; cd irisworkflow - ~/irisworkflow/ $ nstack init workflow - Workflow module 'irisworkflow' successfully initialised at /var/home/fedora/irisworkflow - -NStack creates a single file, ``workflow.nml``, which contains a sample workflow. - -.. code :: java - - module irisworkflow { - def w = sources.http : Text { http_path = "/foo" } | Module1.numChars | sinks.log : Integer; - } - -In this sample, we have a module called ``irisworkflow`` which has a single workflow on it, ``w``. If we replace ``Module.numChars`` with the function from our previous tutorial, ``demo.numChars``, we can then build this workflow with ``nstack build``. - - -.. code :: bash - - ~/irisworkflow/ $ nstack build - Building NStack Workflow module irisworkflow. - Workflow module irisworkflow built successfully. - -And now start this directly with the start command: - -.. code :: bash - - ~/irisworkflow/ $ nstack start irisworkflow.w - -Multiple steps -************** +Multiple Steps +--------------- Workflows can contain as many steps as you like, as long as the output type of one matches the input type of the other. For instance, let's say we wanted to create the following workflow: - Expose an HTTP endpoint which takes four ``Double``\s -- Send these ``Double``\s to our classifier, ``irisclassify``, which will tell us the species of the iris -- Count the number of characters in the species of the iris +- Send these ``Double``\s to our classifier, ``Irisclassify``, which will tell us the species of the iris +- Count the number of characters in the species of the iris using our ``Demo.numChars`` function - Write the result to the log We could write the following workflow: .. code :: bash - module irisworkflow { - def multipleSteps = sources.http (Double, Double, Double, Double) { http_path = "/irisendpoint" } | irisclassify.predict | demo.numChars | sinks.log : Integer; + module Irisworkflow:0.0.1-SNAPSHOT { + import Irisclassify:0.0.1-SNAPSHOT as Classifier; + import Demo:0.0.1-SNAPSHOT as Demo; + + def multipleSteps = Sources.http<(Double, Double, Double, Double> { http_path = "/irisendpoint" } | Classifier.predict | Demo.numChars | sinks.log; } .. note :: ``numChars`` and ``predict`` can be `composed` together because their types -- or schemas -- match. If ``predict`` wasn't configured to output ``Text``, or ``numChars`` wasn't configured to take ``Text`` as input, NStack would not let you build the following workflow. Partial workflows -***************** +----------------- All of the workflows that we have written so far have been `fully composed`, which means that they contain a source and a sink. Many times, you want to split up sources, sinks, and functions into separate pieces you can share and reuse. In this case, we say that a workflow is `partially composed`, which just means it does not contain a source and a sink. These workflows cannot be ``start``\ed by themselves, but can be shared and attached to other sources and/or sinks to become `fully composed`. -For instance, we could combine ``irisclassify.predict`` and ``demo.numChars`` from the previous example to form a new workflow ``speciesLength`` like so: +For instance, we could combine ``Irisclassify.predict`` and ``demo.numChars`` from the previous example to form a new workflow ``speciesLength`` like so: .. code :: java - module irisworkflow { - // A sample workflow - def speciesLength = irisclassify.predict | demo.numChars + module Irisworkflow:0.0.1-SNAPSHOT { + import Irisclassify:0.0.1-SNAPSHOT as Classifier; + import Demo:0.0.1-SNAPSHOT as Demo; + + def speciesLength = Classifier.predict | Demo.numChars } -Because our workflow ``irisworkflow.speciesLength`` has not been connected to a source or a sink, is is technically a function. If we build this workflow, we can see ``speciesLength`` alongside our other functions by using the ``list`` command: +Because our workflow ``Irisworkflow.speciesLength`` has not been connected to a source or a sink, it in itself is still a function. If we build this workflow, we can see ``speciesLength`` alongside our other functions by using the ``list`` command: .. code :: bash - ~/irisworkflow/ $ nstack list functions - irisclassify.predict : (Double, Double, Double, Double) -> Text - demo.numChars : Text -> Integer - irisworkflow.speciesLength : (Double, Double, Double, Double) -> Integer + ~/Irisworkflow/ $ nstack list functions + Irisclassify:0.0.1-SNAPSHOT + predict : (Double, Double, Double, Double) -> Text + Demo:0.0.1 + numChars : Text -> Integer + Irisworkflow:0.0.1-SNAPSHOT + speciesLength : (Double, Double, Double, Double) -> Integer -As we would expect, the input type of the workflow is the input type of ``irisclassify.predict``, and the output type is the output type of ``demo.numChars``. Like other functions, this must be connected to a source and a sink to make it `fully composed` -- and thus something that can be started with the ``start`` command: +As we would expect, the input type of the workflow is the input type of ``Irisclassify.predict``, and the output type is the output type of ``demo.numChars``. Like other functions, this must be connected to a source and a sink to make it `fully composed`, which means we could use this workflow it in *another* workflow. .. code :: bash - ~/irisworkflow/ $ nstack start 'sources.http : (Double, Double, Double, Double) { http_path = "speciesLength" } | irisworkflow.speciesLength | sink.log : Integer' + module Irisendpoint:0.0.1-SNAPSHOT { + import Irisworkflow:0.0.1-SNAPSHOT as IrisWF; + def http = Sources.http<(Double, Double, Double, Double) | IrisWF.speciesLength | Sinks.log; + } Often times you want to re-use a source or a sink without reconfiguring them. To do this, we can similarly separate the sources and sinks into separate workflows, like so: .. code :: java - module irisworkflow { - def httpEndpoint = sources.http : (Double, Double, Double, Double) { http_path = "speciesLength" }; - def logSink = sinks.log : Text; - def speciesWf = httpEndpoint | irisclassify.predict | logSink; + module Irisworkflow:0.0.1-SNAPSHOT { + import Irisclassify:0.0.1-SNAPSHOT as Classifier + + def httpEndpoint = sources.http<(Double, Double, Double, Double)> { http_path = "speciesLength" }; + def logSink = sinks.log + + def speciesWf = httpEndpoint | Classifier.predict | logSink; } -Separating sources and sinks becomes useful when you're connecting to more complex middleware which you don't want to configure each time you use it -- many times you want to reuse a source or sink in multiple workflows. So far we have used HTTP as a source, and the log as a sink, but NStack supports many other integrations. +Separating sources and sinks becomes useful when you're connecting to more complex integrations which you don't want to configure each time you use it -- many times you want to reuse a source or sink in multiple workflows. In the following example, we are defining a module which provides a source and a sink which both sit ontop of Postgres. .. code :: java - module irisDatabases { - def petalsAndSepals = sources.postgres : (Double, Double, Double, Double) { + module IrisDatabases:0.0.1-SNAPSHOT { + def petalsAndSepals = Sources.postgres<(Double, Double, Double, Double)> { pg_database = "flowers", pg_query = "SELECT * FROM iris" }; - def irisSpecies = sinks.postgres : Text { + def irisSpecies = Sinks.postgres { pg_database = "flowers", pg_table = "iris" }; } -If we built, this module, ``irisDatabases.petalsAndSepals`` and ``irisDatbases.irisSpecies`` could be used other modules as sources and sinks. +If we built this module, ``petalsAndSepals`` and ``irisSpecies`` could be used in other modules as sources and sinks, themselves. -We may also want to add a module to do some pre- or post- processing to a source or sink. For instance: +We may also want to add a functions to do some pre- or post- processing to a source or sink. For instance: .. code :: java - module irisDatabases { - def petalsAndSepals = sources.postgres : (Double, Double, Double, Double) { - pg_database = "flowers", - pg_query = "SELECT * FROM iris" - }; + module IrisCleanDbs:0.0.1-SNAPSHOT { - def irisSpecies = sinks.postgres : Text { - pg_database = "flowers", - pg_table = "iris" - }; - - def roundedPetalsSource = petalsAndSepals | PetalsTools.roundPetalLengths; - def irisSpeciesUppercase = TextTools.toUppercase | irisSpecies; + import PetalTools:1.0.0 as PetalTools; + import TextTools:1.1.2 as TextTools; + import IrisDatabases:0.0.1-SNAPSHOT as DB; + + def roundedPetalsSource = DB.petalsAndSepals | PetalsTools.roundPetalLengths; + def irisSpeciesUppercase = TextTools.toUppercase | DB.irisSpecies; } Because ``roundedPetalsSource`` is a combination of a source and a function, it is still a valid source. Similarly, ``irisSpeciesUppercase`` is a combination of a function and a sink, so it is still a valid sink. -.. note :: Composition rules: a function combined with another function is still a function, a source combined with a function is still a source, and a function combined with a sink is still a sink. +Because NStack functions, source, and sinks can be composed and reused, this lets you build powerful abstractions over infrastructure. -This means you can find them in your list of sources and sinks using ``list``. From 10347f49b989cb60dff47f09d61cd0c58c664be0 Mon Sep 17 00:00:00 2001 From: Leo Anthias Date: Wed, 29 Mar 2017 13:27:47 +0100 Subject: [PATCH 12/12] Updated module names to use dots --- advanced_start/more.rst | 34 +++++++++++++++---------------- advanced_start/workflow_power.rst | 34 +++++++++++++++---------------- 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/advanced_start/more.rst b/advanced_start/more.rst index 45d5010..42d4f06 100644 --- a/advanced_start/more.rst +++ b/advanced_start/more.rst @@ -6,7 +6,7 @@ Productionising a Classifier as an NStack Module So far, we have built and published a Python module with a single function on it, ``numChars``, and built a workflow which connects our function to an HTTP endpoint. This in itself isn't particularly useful, so, now that you've got the gist of how NStack works, it's time to build something more realistic! In this tutorial, we're going to create and productionise a simple classifier which uses the famous `iris dataset `_. -We're going to train our classifier to classify which species an iris is, given measurements of its sepals and petals. You can find the dataset we're using to train our model `here `_. +We're going to train our classifier to classify which species an iris is, given measurements of its sepals and petals. You can find the dataset we're using to train our model `here `_. First, let's look at the the format of our data to see how we should approach the problem. We see that we have five fields: @@ -29,19 +29,19 @@ If we are trying to find the species based on the sepal and petal measurements, Creating your classifier module ------------------------------ -To begin, let's make a new directory called ``Irisclassify``, ``cd`` into it, and initialise a new Python module: +To begin, let's make a new directory called ``Iris.Classify``, ``cd`` into it, and initialise a new Python module: .. code:: bash - ~/ $ mkdir Irisclassify; cd Irisclassify - ~/Irisclassify/ $ nstack init python - python module 'Irisclassify' successfully initialised at ~/Irisclassify + ~/ $ mkdir Iris.Classify; cd Iris.Classify + ~/Iris.Classify/ $ nstack init python + python module 'Iris.Classify' successfully initialised at ~/Iris.Classify Next, let's download our training data into this directory so we can use it in our module. We have hosted it for you as a CSV on GitHub. .. code:: bash - ~/Irisclassify/ $ curl -O https://raw.githubusercontent.com/nstackcom/nstack-examples/master/iris/irisclassify/train.csv + ~/Iris.Classify/ $ curl -O https://raw.githubusercontent.com/nstackcom/nstack-examples/master/iris/Iris.Classify/train.csv Defining our API ---------------- @@ -123,16 +123,16 @@ Now we're ready to build and publish our classifier. Remember, even though we ru .. code :: bash - ~/Irisclassify/ $ nstack build - Building NStack Container module Irisclassify. Please wait. This may take some time. - Module Irisclassify built successfully. Use `nstack list functions` to see all available functions. + ~/Iris.Classify/ $ nstack build + Building NStack Container module Iris.Classify. Please wait. This may take some time. + Module Iris.Classify built successfully. Use `nstack list functions` to see all available functions. -We can now see ``Irisclassify.predict`` in the list of existing functions (along with previously built functions like ``demo.numChars``), +We can now see ``Iris.Classify.predict`` in the list of existing functions (along with previously built functions like ``demo.numChars``), .. code :: bash - ~/irisclassify/ $ nstack list functions - Irisclassify:0.0.1-SNAPSHOT + ~/Iris.Classify/ $ nstack list functions + Iris.Classify:0.0.1-SNAPSHOT predict : (Double, Double, Double, Double) -> Text Demo:0.0.1-SNAPSHOT numChars : Text -> Integer @@ -141,8 +141,8 @@ Our classifier is now published, but to use it we need to connect it to an event .. code :: bash - ~/Irisclassify/ $ nstack notebook - import Irisclassify:0.0.1-SNAPSHOT as Classifier + ~/Iris.Classify/ $ nstack notebook + import Iris.Classify:0.0.1-SNAPSHOT as Classifier Sources.http<(Double, Double, Double, Double) | Classifier.predict | Sinks.log [Ctrl-D] @@ -150,7 +150,7 @@ This creates an HTTP endpoint on ``http://localhost:8080/irisendpoint`` which ca .. code :: bash - ~/Irisclassify/ $ nstack ps + ~/Iris.Classify/ $ nstack ps 1 2 @@ -158,9 +158,9 @@ In this instance, it is running as process ``2``. We can test our classifier by .. code :: bash - ~/Irisclassify/ $ curl -X PUT -d '{ "params" : [4.7, 1.4, 6.1, 2.9] }' localhost:8080/irisendpoint + ~/Iris.Classify/ $ curl -X PUT -d '{ "params" : [4.7, 1.4, 6.1, 2.9] }' localhost:8080/irisendpoint Msg Accepted - ~/Irisclassify/ $ nstack log 2 + ~/Iris.Classify/ $ nstack log 2 Feb 17 10:32:30 nostromo nstack-server[8925]: OUTPUT: "Iris-versicolor" Our classifier is now productionised. Next, we're going explore some of the more sophisticated workflows you can build using NStack. diff --git a/advanced_start/workflow_power.rst b/advanced_start/workflow_power.rst index b944960..e82825f 100644 --- a/advanced_start/workflow_power.rst +++ b/advanced_start/workflow_power.rst @@ -11,7 +11,7 @@ Multiple Steps Workflows can contain as many steps as you like, as long as the output type of one matches the input type of the other. For instance, let's say we wanted to create the following workflow: - Expose an HTTP endpoint which takes four ``Double``\s -- Send these ``Double``\s to our classifier, ``Irisclassify``, which will tell us the species of the iris +- Send these ``Double``\s to our classifier, ``Iris.Classify``, which will tell us the species of the iris - Count the number of characters in the species of the iris using our ``Demo.numChars`` function - Write the result to the log @@ -19,8 +19,8 @@ We could write the following workflow: .. code :: bash - module Irisworkflow:0.0.1-SNAPSHOT { - import Irisclassify:0.0.1-SNAPSHOT as Classifier; + module Iris.Workflow:0.0.1-SNAPSHOT { + import Iris.Classify:0.0.1-SNAPSHOT as Classifier; import Demo:0.0.1-SNAPSHOT as Demo; def multipleSteps = Sources.http<(Double, Double, Double, Double> { http_path = "/irisendpoint" } | Classifier.predict | Demo.numChars | sinks.log; @@ -33,35 +33,35 @@ Partial workflows All of the workflows that we have written so far have been `fully composed`, which means that they contain a source and a sink. Many times, you want to split up sources, sinks, and functions into separate pieces you can share and reuse. In this case, we say that a workflow is `partially composed`, which just means it does not contain a source and a sink. These workflows cannot be ``start``\ed by themselves, but can be shared and attached to other sources and/or sinks to become `fully composed`. -For instance, we could combine ``Irisclassify.predict`` and ``demo.numChars`` from the previous example to form a new workflow ``speciesLength`` like so: +For instance, we could combine ``Iris.Classify.predict`` and ``demo.numChars`` from the previous example to form a new workflow ``speciesLength`` like so: .. code :: java - module Irisworkflow:0.0.1-SNAPSHOT { - import Irisclassify:0.0.1-SNAPSHOT as Classifier; + module Iris.Workflow:0.0.1-SNAPSHOT { + import Iris.Classify:0.0.1-SNAPSHOT as Classifier; import Demo:0.0.1-SNAPSHOT as Demo; def speciesLength = Classifier.predict | Demo.numChars } -Because our workflow ``Irisworkflow.speciesLength`` has not been connected to a source or a sink, it in itself is still a function. If we build this workflow, we can see ``speciesLength`` alongside our other functions by using the ``list`` command: +Because our workflow ``Iris.Workflow.speciesLength`` has not been connected to a source or a sink, it in itself is still a function. If we build this workflow, we can see ``speciesLength`` alongside our other functions by using the ``list`` command: .. code :: bash - ~/Irisworkflow/ $ nstack list functions - Irisclassify:0.0.1-SNAPSHOT + ~/Iris.Workflow/ $ nstack list functions + Iris.Classify:0.0.1-SNAPSHOT predict : (Double, Double, Double, Double) -> Text Demo:0.0.1 numChars : Text -> Integer - Irisworkflow:0.0.1-SNAPSHOT + Iris.Workflow:0.0.1-SNAPSHOT speciesLength : (Double, Double, Double, Double) -> Integer -As we would expect, the input type of the workflow is the input type of ``Irisclassify.predict``, and the output type is the output type of ``demo.numChars``. Like other functions, this must be connected to a source and a sink to make it `fully composed`, which means we could use this workflow it in *another* workflow. +As we would expect, the input type of the workflow is the input type of ``Iris.Classify.predict``, and the output type is the output type of ``demo.numChars``. Like other functions, this must be connected to a source and a sink to make it `fully composed`, which means we could use this workflow it in *another* workflow. .. code :: bash - module Irisendpoint:0.0.1-SNAPSHOT { - import Irisworkflow:0.0.1-SNAPSHOT as IrisWF; + module Iris.Endpoint:0.0.1-SNAPSHOT { + import Iris.Workflow:0.0.1-SNAPSHOT as IrisWF; def http = Sources.http<(Double, Double, Double, Double) | IrisWF.speciesLength | Sinks.log; } @@ -69,8 +69,8 @@ Often times you want to re-use a source or a sink without reconfiguring them. To .. code :: java - module Irisworkflow:0.0.1-SNAPSHOT { - import Irisclassify:0.0.1-SNAPSHOT as Classifier + module Iris.Workflow:0.0.1-SNAPSHOT { + import Iris.Classify:0.0.1-SNAPSHOT as Classifier def httpEndpoint = sources.http<(Double, Double, Double, Double)> { http_path = "speciesLength" }; def logSink = sinks.log @@ -82,7 +82,7 @@ Separating sources and sinks becomes useful when you're connecting to more compl .. code :: java - module IrisDatabases:0.0.1-SNAPSHOT { + module Iris.DB:0.0.1-SNAPSHOT { def petalsAndSepals = Sources.postgres<(Double, Double, Double, Double)> { pg_database = "flowers", pg_query = "SELECT * FROM iris" @@ -104,7 +104,7 @@ We may also want to add a functions to do some pre- or post- processing to a sou import PetalTools:1.0.0 as PetalTools; import TextTools:1.1.2 as TextTools; - import IrisDatabases:0.0.1-SNAPSHOT as DB; + import Iris.DB:0.0.1-SNAPSHOT as DB; def roundedPetalsSource = DB.petalsAndSepals | PetalsTools.roundPetalLengths; def irisSpeciesUppercase = TextTools.toUppercase | DB.irisSpecies;