diff --git a/sdk/python/v1beta1/examples/cmaes-and-resume-policies.ipynb b/sdk/python/v1beta1/examples/cmaes-and-resume-policies.ipynb index 11cf5d8c1e9..e9c11afdb4d 100644 --- a/sdk/python/v1beta1/examples/cmaes-and-resume-policies.ipynb +++ b/sdk/python/v1beta1/examples/cmaes-and-resume-policies.ipynb @@ -6,9 +6,9 @@ "source": [ "# HyperParameter tunning using CMA-ES\n", "\n", - "In this example you will deploy 3 Katib experiments with Covariance Matrix Adaptation Evolution Strategy (CMA-ES) using Jupyter Notebook and Katib SDK. These experiments have various resume policies.\n", + "In this example you will deploy 3 Katib Experiments with Covariance Matrix Adaptation Evolution Strategy (CMA-ES) using Jupyter Notebook and Katib SDK. These Experiments have various resume policies.\n", "\n", - "The notebook shows how to create, get, check status and delete experiment." + "The notebook shows how to create, get, check status and delete an Experiment." ] }, { @@ -20,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -28,40 +28,40 @@ "output_type": "stream", "text": [ "Defaulting to user installation because normal site-packages is not writeable\n", - "Collecting kubeflow-katib\n", - " Downloading kubeflow_katib-0.0.5-py3-none-any.whl (112 kB)\n", - "\u001b[K |████████████████████████████████| 112 kB 34.4 MB/s eta 0:00:01\n", + "Collecting kubeflow-katib==0.10.1\n", + " Downloading kubeflow_katib-0.10.1-py3-none-any.whl (113 kB)\n", + "\u001b[K |████████████████████████████████| 113 kB 28.0 MB/s eta 0:00:01\n", "\u001b[?25hCollecting table-logger>=0.3.5\n", " Downloading table_logger-0.3.6-py3-none-any.whl (14 kB)\n", - "Requirement already satisfied: kubernetes==10.0.1 in /usr/local/lib/python3.6/dist-packages (from kubeflow-katib) (10.0.1)\n", - "Requirement already satisfied: six>=1.10 in /usr/lib/python3/dist-packages (from kubeflow-katib) (1.11.0)\n", - "Requirement already satisfied: certifi>=14.05.14 in /usr/local/lib/python3.6/dist-packages (from kubeflow-katib) (2019.11.28)\n", - "Requirement already satisfied: setuptools>=21.0.0 in /usr/local/lib/python3.6/dist-packages (from kubeflow-katib) (45.1.0)\n", - "Requirement already satisfied: python-dateutil>=2.5.3 in /usr/local/lib/python3.6/dist-packages (from kubeflow-katib) (2.8.1)\n", - "Requirement already satisfied: urllib3>=1.15.1 in /usr/local/lib/python3.6/dist-packages (from kubeflow-katib) (1.25.8)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from table-logger>=0.3.5->kubeflow-katib) (1.18.1)\n", - "Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /usr/local/lib/python3.6/dist-packages (from kubernetes==10.0.1->kubeflow-katib) (0.57.0)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from kubernetes==10.0.1->kubeflow-katib) (2.22.0)\n", - "Requirement already satisfied: requests-oauthlib in /usr/local/lib/python3.6/dist-packages (from kubernetes==10.0.1->kubeflow-katib) (1.3.0)\n", - "Requirement already satisfied: pyyaml>=3.12 in /usr/local/lib/python3.6/dist-packages (from kubernetes==10.0.1->kubeflow-katib) (5.3)\n", - "Requirement already satisfied: google-auth>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from kubernetes==10.0.1->kubeflow-katib) (1.11.0)\n", - "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->kubernetes==10.0.1->kubeflow-katib) (3.0.4)\n", - "Requirement already satisfied: idna<2.9,>=2.5 in /usr/lib/python3/dist-packages (from requests->kubernetes==10.0.1->kubeflow-katib) (2.6)\n", - "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib->kubernetes==10.0.1->kubeflow-katib) (3.1.0)\n", - "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.0.1->kubernetes==10.0.1->kubeflow-katib) (0.2.8)\n", - "Requirement already satisfied: rsa<4.1,>=3.1.4 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.0.1->kubernetes==10.0.1->kubeflow-katib) (4.0)\n", - "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.0.1->kubernetes==10.0.1->kubeflow-katib) (4.0.0)\n", - "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.6/dist-packages (from pyasn1-modules>=0.2.1->google-auth>=1.0.1->kubernetes==10.0.1->kubeflow-katib) (0.4.8)\n", + "Requirement already satisfied: certifi>=14.05.14 in /usr/local/lib/python3.6/dist-packages (from kubeflow-katib==0.10.1) (2019.11.28)\n", + "Requirement already satisfied: urllib3>=1.15.1 in /usr/local/lib/python3.6/dist-packages (from kubeflow-katib==0.10.1) (1.25.8)\n", + "Requirement already satisfied: setuptools>=21.0.0 in /usr/local/lib/python3.6/dist-packages (from kubeflow-katib==0.10.1) (45.1.0)\n", + "Requirement already satisfied: python-dateutil>=2.5.3 in /usr/local/lib/python3.6/dist-packages (from kubeflow-katib==0.10.1) (2.8.1)\n", + "Requirement already satisfied: six>=1.10 in /usr/lib/python3/dist-packages (from kubeflow-katib==0.10.1) (1.11.0)\n", + "Requirement already satisfied: kubernetes==10.0.1 in /usr/local/lib/python3.6/dist-packages (from kubeflow-katib==0.10.1) (10.0.1)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from table-logger>=0.3.5->kubeflow-katib==0.10.1) (1.18.1)\n", + "Requirement already satisfied: pyyaml>=3.12 in /usr/local/lib/python3.6/dist-packages (from kubernetes==10.0.1->kubeflow-katib==0.10.1) (5.3)\n", + "Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /usr/local/lib/python3.6/dist-packages (from kubernetes==10.0.1->kubeflow-katib==0.10.1) (0.57.0)\n", + "Requirement already satisfied: google-auth>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from kubernetes==10.0.1->kubeflow-katib==0.10.1) (1.11.0)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from kubernetes==10.0.1->kubeflow-katib==0.10.1) (2.22.0)\n", + "Requirement already satisfied: requests-oauthlib in /usr/local/lib/python3.6/dist-packages (from kubernetes==10.0.1->kubeflow-katib==0.10.1) (1.3.0)\n", + "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.0.1->kubernetes==10.0.1->kubeflow-katib==0.10.1) (4.0.0)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.0.1->kubernetes==10.0.1->kubeflow-katib==0.10.1) (0.2.8)\n", + "Requirement already satisfied: rsa<4.1,>=3.1.4 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.0.1->kubernetes==10.0.1->kubeflow-katib==0.10.1) (4.0)\n", + "Requirement already satisfied: idna<2.9,>=2.5 in /usr/lib/python3/dist-packages (from requests->kubernetes==10.0.1->kubeflow-katib==0.10.1) (2.6)\n", + "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->kubernetes==10.0.1->kubeflow-katib==0.10.1) (3.0.4)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib->kubernetes==10.0.1->kubeflow-katib==0.10.1) (3.1.0)\n", + "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.6/dist-packages (from pyasn1-modules>=0.2.1->google-auth>=1.0.1->kubernetes==10.0.1->kubeflow-katib==0.10.1) (0.4.8)\n", "Installing collected packages: table-logger, kubeflow-katib\n", - "Successfully installed kubeflow-katib-0.0.5 table-logger-0.3.6\n", - "\u001b[33mWARNING: You are using pip version 20.0.2; however, version 20.2.3 is available.\n", + "Successfully installed kubeflow-katib-0.10.1 table-logger-0.3.6\n", + "\u001b[33mWARNING: You are using pip version 20.0.2; however, version 20.3 is available.\n", "You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ - "pip install kubeflow-katib" + "pip install kubeflow-katib==0.10.1" ] }, { @@ -123,9 +123,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Define experiment\n", + "## Define your Experiment\n", "\n", - "You have to create experiment object before deploying it. This experiment is similar to [this](https://github.com/kubeflow/katib/blob/master/examples/v1beta1/cmaes-example.yaml) example." + "You have to create your Experiment object before deploying it. This Experiment is similar to [this](https://github.com/kubeflow/katib/blob/master/examples/v1beta1/cmaes-example.yaml) example." ] }, { @@ -134,7 +134,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Experiment metadata\n", + "# Experiment name and namespace.\n", "namespace = \"anonymous\"\n", "experiment_name = \"cmaes-example\"\n", "\n", @@ -143,12 +143,12 @@ " namespace=namespace\n", ")\n", "\n", - "# Algorithm specification\n", + "# Algorithm specification.\n", "algorithm_spec=V1beta1AlgorithmSpec(\n", " algorithm_name=\"cmaes\"\n", ")\n", "\n", - "# Objective specification\n", + "# Objective specification.\n", "objective_spec=V1beta1ObjectiveSpec(\n", " type=\"maximize\",\n", " goal= 0.99,\n", @@ -185,17 +185,22 @@ "\n", "\n", "\n", - "# JSON trial template specification\n", + "# JSON template specification for the Trial's Worker Kubernetes Job.\n", "trial_spec={\n", " \"apiVersion\": \"batch/v1\",\n", " \"kind\": \"Job\",\n", " \"spec\": {\n", " \"template\": {\n", + " \"metadata\": {\n", + " \"annotations\": {\n", + " \"sidecar.istio.io/inject\": \"false\"\n", + " }\n", + " },\n", " \"spec\": {\n", " \"containers\": [\n", " {\n", " \"name\": \"training-container\",\n", - " \"image\": \"docker.io/kubeflowkatib/mxnet-mnist:v1beta1-e294a90\",\n", + " \"image\": \"docker.io/kubeflowkatib/mxnet-mnist:v1beta1-91e4996\",\n", " \"command\": [\n", " \"python3\",\n", " \"/opt/mxnet-mnist/mnist.py\",\n", @@ -212,8 +217,9 @@ " }\n", "}\n", "\n", - "# Template with trial parameters and trial spec\n", + "# Configure parameters for the Trial template.\n", "trial_template=V1beta1TrialTemplate(\n", + " primary_container_name=\"training-container\",\n", " trial_parameters=[\n", " V1beta1TrialParameterSpec(\n", " name=\"learningRate\",\n", @@ -235,7 +241,7 @@ ")\n", "\n", "\n", - "# Experiment object\n", + "# Experiment object.\n", "experiment = V1beta1Experiment(\n", " api_version=\"kubeflow.org/v1beta1\",\n", " kind=\"Experiment\",\n", @@ -256,32 +262,32 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Define experiments with resume policy\n", + "# Define Experiments with resume policy\n", "\n", - "We will define another 2 experiments with ResumePolicy = Never and ResumePolicy = FromVolume.\n", + "We will define another 2 Experiments with ResumePolicy = Never and ResumePolicy = FromVolume.\n", "\n", - "Experiment with _Never_ resume policy can't be resumed, suggestion resources will be deleted.\n", + "Experiment with _Never_ resume policy can't be resumed, the Suggestion resources will be deleted.\n", "\n", - "Experiment with _FromVolume_ resume policy can be resumed, volume is attached to suggestion. PVC and PV should be created for suggestion." + "Experiment with _FromVolume_ resume policy can be resumed, volume is attached to the Suggestion. Suggestion's PVC and PV should be created for the Suggestion." ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "experiment_never_resume_name = \"never-resume-cmaes\"\n", "experiment_from_volume_resume_name = \"from-volume-resume-cmaes\"\n", "\n", - "# Create new experiments from previous experiment info\n", - "# Define experiment with never resume\n", + "# Create new Experiments from the previous Experiment info.\n", + "# Define Experiment with never resume.\n", "experiment_never_resume = copy.deepcopy(experiment)\n", "experiment_never_resume.metadata.name = experiment_never_resume_name\n", "experiment_never_resume.spec.resume_policy = \"Never\"\n", "experiment_never_resume.spec.max_trial_count = 4\n", "\n", - "# Define experiment with from volume resume\n", + "# Define Experiment with from volume resume.\n", "experiment_from_volume_resume = copy.deepcopy(experiment)\n", "experiment_from_volume_resume.metadata.name = experiment_from_volume_resume_name\n", "experiment_from_volume_resume.spec.resume_policy = \"FromVolume\"\n", @@ -292,12 +298,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "You can print experiment's info to verify it before submission" + "You can print the Experiment's info to verify it before submission." ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 4, "metadata": { "scrolled": true }, @@ -332,14 +338,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Create experiment\n", + "# Create your Experiment\n", "\n", - "You have to create Katib client to use SDK" + "You have to create Katib client to use the SDK." ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 5, "metadata": { "scrolled": false }, @@ -361,13 +367,13 @@ "text/plain": [ "{'apiVersion': 'kubeflow.org/v1beta1',\n", " 'kind': 'Experiment',\n", - " 'metadata': {'creationTimestamp': '2020-09-14T23:15:47Z',\n", + " 'metadata': {'creationTimestamp': '2020-11-30T19:02:20Z',\n", " 'generation': 1,\n", " 'name': 'cmaes-example',\n", " 'namespace': 'anonymous',\n", - " 'resourceVersion': '127102635',\n", + " 'resourceVersion': '170779217',\n", " 'selfLink': '/apis/kubeflow.org/v1beta1/namespaces/anonymous/experiments/cmaes-example',\n", - " 'uid': '68c43a20-6926-4586-9440-6a7930d7712d'},\n", + " 'uid': '6d8b16d3-3778-4fc1-ba3d-d524ec487450'},\n", " 'spec': {'algorithm': {'algorithmName': 'cmaes'},\n", " 'maxFailedTrialCount': 3,\n", " 'maxTrialCount': 7,\n", @@ -389,7 +395,10 @@ " 'name': 'optimizer',\n", " 'parameterType': 'categorical'}],\n", " 'resumePolicy': 'LongRunning',\n", - " 'trialTemplate': {'trialParameters': [{'description': 'Learning rate for the training model',\n", + " 'trialTemplate': {'failureCondition': 'status.conditions.#(type==\"Failed\")#|#(status==\"True\")#',\n", + " 'primaryContainerName': 'training-container',\n", + " 'successCondition': 'status.conditions.#(type==\"Complete\")#|#(status==\"True\")#',\n", + " 'trialParameters': [{'description': 'Learning rate for the training model',\n", " 'name': 'learningRate',\n", " 'reference': 'lr'},\n", " {'description': 'Number of training model layers',\n", @@ -400,27 +409,28 @@ " 'reference': 'optimizer'}],\n", " 'trialSpec': {'apiVersion': 'batch/v1',\n", " 'kind': 'Job',\n", - " 'spec': {'template': {'spec': {'containers': [{'command': ['python3',\n", + " 'spec': {'template': {'metadata': {'annotations': {'sidecar.istio.io/inject': 'false'}},\n", + " 'spec': {'containers': [{'command': ['python3',\n", " '/opt/mxnet-mnist/mnist.py',\n", " '--batch-size=64',\n", " '--lr=${trialParameters.learningRate}',\n", " '--num-layers=${trialParameters.numberLayers}',\n", " '--optimizer=${trialParameters.optimizer}'],\n", - " 'image': 'docker.io/kubeflowkatib/mxnet-mnist:v1beta1-e294a90',\n", + " 'image': 'docker.io/kubeflowkatib/mxnet-mnist:v1beta1-91e4996',\n", " 'name': 'training-container'}],\n", " 'restartPolicy': 'Never'}}}}}}}" ] }, - "execution_count": 12, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Create client\n", + "# Create client.\n", "kclient = KatibClient()\n", "\n", - "# Create experiment\n", + "# Create your Experiment.\n", "kclient.create_experiment(experiment,namespace=namespace)" ] }, @@ -428,12 +438,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Create other experiments" + "Create other Experiments." ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -465,13 +475,13 @@ "text/plain": [ "{'apiVersion': 'kubeflow.org/v1beta1',\n", " 'kind': 'Experiment',\n", - " 'metadata': {'creationTimestamp': '2020-09-14T23:16:04Z',\n", + " 'metadata': {'creationTimestamp': '2020-11-30T19:02:29Z',\n", " 'generation': 1,\n", " 'name': 'from-volume-resume-cmaes',\n", " 'namespace': 'anonymous',\n", - " 'resourceVersion': '127102800',\n", + " 'resourceVersion': '170779317',\n", " 'selfLink': '/apis/kubeflow.org/v1beta1/namespaces/anonymous/experiments/from-volume-resume-cmaes',\n", - " 'uid': '65173495-e76b-4136-88fa-a688790150cd'},\n", + " 'uid': '20f3cee6-818d-48ff-ad1e-d2c85cee51f1'},\n", " 'spec': {'algorithm': {'algorithmName': 'cmaes'},\n", " 'maxFailedTrialCount': 3,\n", " 'maxTrialCount': 4,\n", @@ -493,7 +503,10 @@ " 'name': 'optimizer',\n", " 'parameterType': 'categorical'}],\n", " 'resumePolicy': 'FromVolume',\n", - " 'trialTemplate': {'trialParameters': [{'description': 'Learning rate for the training model',\n", + " 'trialTemplate': {'failureCondition': 'status.conditions.#(type==\"Failed\")#|#(status==\"True\")#',\n", + " 'primaryContainerName': 'training-container',\n", + " 'successCondition': 'status.conditions.#(type==\"Complete\")#|#(status==\"True\")#',\n", + " 'trialParameters': [{'description': 'Learning rate for the training model',\n", " 'name': 'learningRate',\n", " 'reference': 'lr'},\n", " {'description': 'Number of training model layers',\n", @@ -504,26 +517,27 @@ " 'reference': 'optimizer'}],\n", " 'trialSpec': {'apiVersion': 'batch/v1',\n", " 'kind': 'Job',\n", - " 'spec': {'template': {'spec': {'containers': [{'command': ['python3',\n", + " 'spec': {'template': {'metadata': {'annotations': {'sidecar.istio.io/inject': 'false'}},\n", + " 'spec': {'containers': [{'command': ['python3',\n", " '/opt/mxnet-mnist/mnist.py',\n", " '--batch-size=64',\n", " '--lr=${trialParameters.learningRate}',\n", " '--num-layers=${trialParameters.numberLayers}',\n", " '--optimizer=${trialParameters.optimizer}'],\n", - " 'image': 'docker.io/kubeflowkatib/mxnet-mnist:v1beta1-e294a90',\n", + " 'image': 'docker.io/kubeflowkatib/mxnet-mnist:v1beta1-91e4996',\n", " 'name': 'training-container'}],\n", " 'restartPolicy': 'Never'}}}}}}}" ] }, - "execution_count": 13, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Create experiment with never resume\n", + "# Create Experiment with never resume.\n", "kclient.create_experiment(experiment_never_resume,namespace=namespace)\n", - "# Create experiment with from volume resume\n", + "# Create Experiment with from volume resume.\n", "kclient.create_experiment(experiment_from_volume_resume,namespace=namespace)" ] }, @@ -531,14 +545,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Get experiment\n", + "# Get your Experiment\n", "\n", - "You can get experiment by name and receive required data" + "You can get your Experiment by name and receive required data." ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 7, "metadata": { "scrolled": true }, @@ -547,11 +561,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'apiVersion': 'kubeflow.org/v1beta1', 'kind': 'Experiment', 'metadata': {'creationTimestamp': '2020-09-14T23:15:47Z', 'finalizers': ['update-prometheus-metrics'], 'generation': 1, 'name': 'cmaes-example', 'namespace': 'anonymous', 'resourceVersion': '127103016', 'selfLink': '/apis/kubeflow.org/v1beta1/namespaces/anonymous/experiments/cmaes-example', 'uid': '68c43a20-6926-4586-9440-6a7930d7712d'}, 'spec': {'algorithm': {'algorithmName': 'cmaes'}, 'maxFailedTrialCount': 3, 'maxTrialCount': 7, 'metricsCollectorSpec': {'collector': {'kind': 'StdOut'}}, 'objective': {'additionalMetricNames': ['Train-accuracy'], 'goal': 0.99, 'metricStrategies': [{'name': 'Validation-accuracy', 'value': 'max'}, {'name': 'Train-accuracy', 'value': 'max'}], 'objectiveMetricName': 'Validation-accuracy', 'type': 'maximize'}, 'parallelTrialCount': 3, 'parameters': [{'feasibleSpace': {'max': '0.06', 'min': '0.01'}, 'name': 'lr', 'parameterType': 'double'}, {'feasibleSpace': {'max': '5', 'min': '2'}, 'name': 'num-layers', 'parameterType': 'int'}, {'feasibleSpace': {'list': ['sgd', 'adam', 'ftrl']}, 'name': 'optimizer', 'parameterType': 'categorical'}], 'resumePolicy': 'LongRunning', 'trialTemplate': {'trialParameters': [{'description': 'Learning rate for the training model', 'name': 'learningRate', 'reference': 'lr'}, {'description': 'Number of training model layers', 'name': 'numberLayers', 'reference': 'num-layers'}, {'description': 'Training model optimizer (sdg, adam or ftrl)', 'name': 'optimizer', 'reference': 'optimizer'}], 'trialSpec': {'apiVersion': 'batch/v1', 'kind': 'Job', 'spec': {'template': {'spec': {'containers': [{'command': ['python3', '/opt/mxnet-mnist/mnist.py', '--batch-size=64', '--lr=${trialParameters.learningRate}', '--num-layers=${trialParameters.numberLayers}', '--optimizer=${trialParameters.optimizer}'], 'image': 'docker.io/kubeflowkatib/mxnet-mnist:v1beta1-e294a90', 'name': 'training-container'}], 'restartPolicy': 'Never'}}}}}}, 'status': {'conditions': [{'lastTransitionTime': '2020-09-14T23:15:48Z', 'lastUpdateTime': '2020-09-14T23:15:48Z', 'message': 'Experiment is created', 'reason': 'ExperimentCreated', 'status': 'True', 'type': 'Created'}, {'lastTransitionTime': '2020-09-14T23:16:22Z', 'lastUpdateTime': '2020-09-14T23:16:22Z', 'message': 'Experiment is running', 'reason': 'ExperimentRunning', 'status': 'True', 'type': 'Running'}], 'currentOptimalTrial': {'bestTrialName': '', 'observation': {'metrics': None}, 'parameterAssignments': None}, 'runningTrialList': ['cmaes-example-b6pbtrm8', 'cmaes-example-j9rlhrfc', 'cmaes-example-vhjxdbfx'], 'startTime': '2020-09-14T23:15:48Z', 'trials': 3, 'trialsRunning': 3}}\n", + "{'apiVersion': 'kubeflow.org/v1beta1', 'kind': 'Experiment', 'metadata': {'creationTimestamp': '2020-11-30T19:02:20Z', 'finalizers': ['update-prometheus-metrics'], 'generation': 1, 'name': 'cmaes-example', 'namespace': 'anonymous', 'resourceVersion': '170779219', 'selfLink': '/apis/kubeflow.org/v1beta1/namespaces/anonymous/experiments/cmaes-example', 'uid': '6d8b16d3-3778-4fc1-ba3d-d524ec487450'}, 'spec': {'algorithm': {'algorithmName': 'cmaes'}, 'maxFailedTrialCount': 3, 'maxTrialCount': 7, 'metricsCollectorSpec': {'collector': {'kind': 'StdOut'}}, 'objective': {'additionalMetricNames': ['Train-accuracy'], 'goal': 0.99, 'metricStrategies': [{'name': 'Validation-accuracy', 'value': 'max'}, {'name': 'Train-accuracy', 'value': 'max'}], 'objectiveMetricName': 'Validation-accuracy', 'type': 'maximize'}, 'parallelTrialCount': 3, 'parameters': [{'feasibleSpace': {'max': '0.06', 'min': '0.01'}, 'name': 'lr', 'parameterType': 'double'}, {'feasibleSpace': {'max': '5', 'min': '2'}, 'name': 'num-layers', 'parameterType': 'int'}, {'feasibleSpace': {'list': ['sgd', 'adam', 'ftrl']}, 'name': 'optimizer', 'parameterType': 'categorical'}], 'resumePolicy': 'LongRunning', 'trialTemplate': {'failureCondition': 'status.conditions.#(type==\"Failed\")#|#(status==\"True\")#', 'primaryContainerName': 'training-container', 'successCondition': 'status.conditions.#(type==\"Complete\")#|#(status==\"True\")#', 'trialParameters': [{'description': 'Learning rate for the training model', 'name': 'learningRate', 'reference': 'lr'}, {'description': 'Number of training model layers', 'name': 'numberLayers', 'reference': 'num-layers'}, {'description': 'Training model optimizer (sdg, adam or ftrl)', 'name': 'optimizer', 'reference': 'optimizer'}], 'trialSpec': {'apiVersion': 'batch/v1', 'kind': 'Job', 'spec': {'template': {'metadata': {'annotations': {'sidecar.istio.io/inject': 'false'}}, 'spec': {'containers': [{'command': ['python3', '/opt/mxnet-mnist/mnist.py', '--batch-size=64', '--lr=${trialParameters.learningRate}', '--num-layers=${trialParameters.numberLayers}', '--optimizer=${trialParameters.optimizer}'], 'image': 'docker.io/kubeflowkatib/mxnet-mnist:v1beta1-91e4996', 'name': 'training-container'}], 'restartPolicy': 'Never'}}}}}}, 'status': {'completionTime': None, 'conditions': [{'lastTransitionTime': '2020-11-30T19:02:20Z', 'lastUpdateTime': '2020-11-30T19:02:20Z', 'message': 'Experiment is created', 'reason': 'ExperimentCreated', 'status': 'True', 'type': 'Created'}], 'currentOptimalTrial': {'bestTrialName': '', 'observation': {'metrics': None}, 'parameterAssignments': None}, 'startTime': '2020-11-30T19:02:20Z'}}\n", "-----------------\n", "\n", "7\n", - "{'lastTransitionTime': '2020-09-14T23:16:22Z', 'lastUpdateTime': '2020-09-14T23:16:22Z', 'message': 'Experiment is running', 'reason': 'ExperimentRunning', 'status': 'True', 'type': 'Running'}\n" + "{'lastTransitionTime': '2020-11-30T19:02:20Z', 'lastUpdateTime': '2020-11-30T19:02:20Z', 'message': 'Experiment is created', 'reason': 'ExperimentCreated', 'status': 'True', 'type': 'Created'}\n" ] } ], @@ -560,7 +574,7 @@ "print(exp)\n", "print(\"-----------------\\n\")\n", "\n", - "# Get max trial count and last status\n", + "# Get the max trial count and latest status.\n", "print(exp[\"spec\"][\"maxTrialCount\"])\n", "print(exp[\"status\"][\"conditions\"][-1])" ] @@ -569,14 +583,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Get all experiments\n", + "# Get all Experiments\n", "\n", - "You can get list of current experiments" + "You can get list of the current Experiments." ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -590,7 +604,7 @@ } ], "source": [ - "# Get names from running experiments\n", + "# Get names from the running Experiments.\n", "exp_list = kclient.get_experiment(namespace=namespace)\n", "\n", "for exp in exp_list[\"items\"]:\n", @@ -601,14 +615,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Get current experiment status\n", + "# Get the current Experiment status\n", "\n", - "You can check current experiment status" + "You can check the current Experiment status." ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 9, "metadata": { "scrolled": false }, @@ -616,10 +630,10 @@ { "data": { "text/plain": [ - "'Succeeded'" + "'Running'" ] }, - "execution_count": 22, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -632,12 +646,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "You can check if experiment is succeeded" + "You can check if your Experiment is succeeded." ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 10, "metadata": { "scrolled": true }, @@ -645,10 +659,10 @@ { "data": { "text/plain": [ - "True" + "False" ] }, - "execution_count": 23, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -661,14 +675,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# List of current trials\n", + "# List of the current Trials\n", "\n", - "You can get list of current trials with latest status" + "You can get list of the current trials with the latest status." ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 11, "metadata": { "scrolled": true }, @@ -676,22 +690,20 @@ { "data": { "text/plain": [ - "[{'name': 'cmaes-example-7jm8qj6m', 'status': 'Succeeded'},\n", - " {'name': 'cmaes-example-b6pbtrm8', 'status': 'Succeeded'},\n", - " {'name': 'cmaes-example-c8f55mvb', 'status': 'Succeeded'},\n", - " {'name': 'cmaes-example-d2l7mwnb', 'status': 'Succeeded'},\n", - " {'name': 'cmaes-example-j9rlhrfc', 'status': 'Succeeded'},\n", - " {'name': 'cmaes-example-mmlvn8sg', 'status': 'Succeeded'},\n", - " {'name': 'cmaes-example-vhjxdbfx', 'status': 'Succeeded'}]" + "[{'name': 'cmaes-example-488pljjb', 'status': 'Running'},\n", + " {'name': 'cmaes-example-bfsszl9p', 'status': 'Succeeded'},\n", + " {'name': 'cmaes-example-cnr8grsw', 'status': 'Succeeded'},\n", + " {'name': 'cmaes-example-tpvpv8wp', 'status': 'Running'},\n", + " {'name': 'cmaes-example-xzvbcn4l', 'status': 'Running'}]" ] }, - "execution_count": 24, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# List trials\n", + "# Trial list.\n", "kclient.list_trials(name=experiment_name, namespace=namespace)" ] }, @@ -699,40 +711,40 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Get optimal HyperParameters\n", + "# Get the optimal HyperParameters\n", "\n", - "You can get current optimal trial from experiment. For each metric you can see max, min and latest value." + "You can get the current optimal Trial from your Experiment. For the each metric you can see the max, min and latest value." ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'currentOptimalTrial': {'bestTrialName': 'cmaes-example-vhjxdbfx',\n", - " 'observation': {'metrics': [{'latest': '0.980295',\n", - " 'max': '0.980295',\n", - " 'min': '0.963774',\n", + "{'currentOptimalTrial': {'bestTrialName': 'cmaes-example-cnr8grsw',\n", + " 'observation': {'metrics': [{'latest': '0.976015',\n", + " 'max': '0.978802',\n", + " 'min': '0.958798',\n", " 'name': 'Validation-accuracy'},\n", - " {'latest': '0.990988',\n", - " 'max': '0.991654',\n", - " 'min': '0.925773',\n", + " {'latest': '0.992820',\n", + " 'max': '0.992820',\n", + " 'min': '0.920359',\n", " 'name': 'Train-accuracy'}]},\n", " 'parameterAssignments': [{'name': 'lr', 'value': '0.04511033252270099'},\n", " {'name': 'num-layers', 'value': '3'},\n", " {'name': 'optimizer', 'value': 'sgd'}]}}" ] }, - "execution_count": 25, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Optimal HPs\n", + "# Optimal HPs.\n", "kclient.get_optimal_hyperparameters(name=experiment_name, namespace=namespace)" ] }, @@ -740,16 +752,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Status for suggestion objects\n", + "# Status for the Suggestion objects\n", "\n", - "You can check suggestion object status for more information about resume status.\n", + "You can check the Suggestion object status for more information about resume status.\n", "\n", - "For experiment with FromVolume you should be able to check created PVC and PV." + "For Experiment with FromVolume you should be able to check created PVC and PV." ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -763,30 +775,30 @@ } ], "source": [ - "# Get never resume experiment's suggestion status\n", + "# Get the current Suggestion status for the never resume Experiment.\n", "suggestion = kclient.get_suggestion(name=experiment_never_resume_name, namespace=namespace)\n", "\n", "print(suggestion[\"status\"][\"conditions\"][-1][\"message\"])\n", "print(\"-----------------\")\n", "\n", - "# Get from volume resume experiment's suggestion status\n", + "# Get the current Suggestion status for the from volume Experiment.\n", "suggestion = kclient.get_suggestion(name=experiment_from_volume_resume_name, namespace=namespace)\n", "\n", - "print(suggestion[\"status\"][\"conditions\"][-1][\"message\"])\n" + "print(suggestion[\"status\"][\"conditions\"][-1][\"message\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Delete experiments\n", + "# Delete your Experiments\n", "\n", - "You can delete experiments" + "You can delete your Experiments." ] }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -794,16 +806,16 @@ "text/plain": [ "{'apiVersion': 'kubeflow.org/v1beta1',\n", " 'kind': 'Experiment',\n", - " 'metadata': {'creationTimestamp': '2020-09-14T23:16:04Z',\n", + " 'metadata': {'creationTimestamp': '2020-11-30T19:02:29Z',\n", " 'deletionGracePeriodSeconds': 0,\n", - " 'deletionTimestamp': '2020-09-14T23:33:24Z',\n", + " 'deletionTimestamp': '2020-11-30T19:20:53Z',\n", " 'finalizers': ['update-prometheus-metrics'],\n", " 'generation': 2,\n", " 'name': 'from-volume-resume-cmaes',\n", " 'namespace': 'anonymous',\n", - " 'resourceVersion': '127110528',\n", + " 'resourceVersion': '170787823',\n", " 'selfLink': '/apis/kubeflow.org/v1beta1/namespaces/anonymous/experiments/from-volume-resume-cmaes',\n", - " 'uid': '65173495-e76b-4136-88fa-a688790150cd'},\n", + " 'uid': '20f3cee6-818d-48ff-ad1e-d2c85cee51f1'},\n", " 'spec': {'algorithm': {'algorithmName': 'cmaes'},\n", " 'maxFailedTrialCount': 3,\n", " 'maxTrialCount': 4,\n", @@ -825,7 +837,10 @@ " 'name': 'optimizer',\n", " 'parameterType': 'categorical'}],\n", " 'resumePolicy': 'FromVolume',\n", - " 'trialTemplate': {'trialParameters': [{'description': 'Learning rate for the training model',\n", + " 'trialTemplate': {'failureCondition': 'status.conditions.#(type==\"Failed\")#|#(status==\"True\")#',\n", + " 'primaryContainerName': 'training-container',\n", + " 'successCondition': 'status.conditions.#(type==\"Complete\")#|#(status==\"True\")#',\n", + " 'trialParameters': [{'description': 'Learning rate for the training model',\n", " 'name': 'learningRate',\n", " 'reference': 'lr'},\n", " {'description': 'Number of training model layers',\n", @@ -836,56 +851,57 @@ " 'reference': 'optimizer'}],\n", " 'trialSpec': {'apiVersion': 'batch/v1',\n", " 'kind': 'Job',\n", - " 'spec': {'template': {'spec': {'containers': [{'command': ['python3',\n", + " 'spec': {'template': {'metadata': {'annotations': {'sidecar.istio.io/inject': 'false'}},\n", + " 'spec': {'containers': [{'command': ['python3',\n", " '/opt/mxnet-mnist/mnist.py',\n", " '--batch-size=64',\n", " '--lr=${trialParameters.learningRate}',\n", " '--num-layers=${trialParameters.numberLayers}',\n", " '--optimizer=${trialParameters.optimizer}'],\n", - " 'image': 'docker.io/kubeflowkatib/mxnet-mnist:v1beta1-e294a90',\n", + " 'image': 'docker.io/kubeflowkatib/mxnet-mnist:v1beta1-91e4996',\n", " 'name': 'training-container'}],\n", " 'restartPolicy': 'Never'}}}}}},\n", - " 'status': {'completionTime': '2020-09-14T23:25:43Z',\n", - " 'conditions': [{'lastTransitionTime': '2020-09-14T23:16:04Z',\n", - " 'lastUpdateTime': '2020-09-14T23:16:04Z',\n", + " 'status': {'completionTime': '2020-11-30T19:19:54Z',\n", + " 'conditions': [{'lastTransitionTime': '2020-11-30T19:02:29Z',\n", + " 'lastUpdateTime': '2020-11-30T19:02:29Z',\n", " 'message': 'Experiment is created',\n", " 'reason': 'ExperimentCreated',\n", " 'status': 'True',\n", " 'type': 'Created'},\n", - " {'lastTransitionTime': '2020-09-14T23:25:43Z',\n", - " 'lastUpdateTime': '2020-09-14T23:25:43Z',\n", + " {'lastTransitionTime': '2020-11-30T19:19:54Z',\n", + " 'lastUpdateTime': '2020-11-30T19:19:54Z',\n", " 'message': 'Experiment is running',\n", " 'reason': 'ExperimentRunning',\n", " 'status': 'False',\n", " 'type': 'Running'},\n", - " {'lastTransitionTime': '2020-09-14T23:25:43Z',\n", - " 'lastUpdateTime': '2020-09-14T23:25:43Z',\n", + " {'lastTransitionTime': '2020-11-30T19:19:54Z',\n", + " 'lastUpdateTime': '2020-11-30T19:19:54Z',\n", " 'message': 'Experiment has succeeded because max trial count has reached',\n", " 'reason': 'ExperimentMaxTrialsReached',\n", " 'status': 'True',\n", " 'type': 'Succeeded'}],\n", - " 'currentOptimalTrial': {'bestTrialName': 'from-volume-resume-cmaes-kc7qzd9r',\n", - " 'observation': {'metrics': [{'latest': '0.974224',\n", - " 'max': '0.975916',\n", - " 'min': '0.958997',\n", + " 'currentOptimalTrial': {'bestTrialName': 'from-volume-resume-cmaes-m79z545s',\n", + " 'observation': {'metrics': [{'latest': '0.978702',\n", + " 'max': '0.978702',\n", + " 'min': '0.956708',\n", " 'name': 'Validation-accuracy'},\n", - " {'latest': '0.992054',\n", - " 'max': '0.992054',\n", - " 'min': '0.922508',\n", + " {'latest': '0.991055',\n", + " 'max': '0.991055',\n", + " 'min': '0.922725',\n", " 'name': 'Train-accuracy'}]},\n", " 'parameterAssignments': [{'name': 'lr', 'value': '0.04511033252270099'},\n", " {'name': 'num-layers', 'value': '3'},\n", " {'name': 'optimizer', 'value': 'sgd'}]},\n", - " 'startTime': '2020-09-14T23:16:04Z',\n", - " 'succeededTrialList': ['from-volume-resume-cmaes-69mnvsbp',\n", - " 'from-volume-resume-cmaes-8rc2xnjs',\n", - " 'from-volume-resume-cmaes-j97sfz7r',\n", - " 'from-volume-resume-cmaes-kc7qzd9r'],\n", + " 'startTime': '2020-11-30T19:02:29Z',\n", + " 'succeededTrialList': ['from-volume-resume-cmaes-668kbxz6',\n", + " 'from-volume-resume-cmaes-985qjvlb',\n", + " 'from-volume-resume-cmaes-m79z545s',\n", + " 'from-volume-resume-cmaes-wl692bzb'],\n", " 'trials': 4,\n", " 'trialsSucceeded': 4}}" ] }, - "execution_count": 29, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -895,6 +911,13 @@ "kclient.delete_experiment(name=experiment_never_resume_name, namespace=namespace)\n", "kclient.delete_experiment(name=experiment_from_volume_resume_name, namespace=namespace)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/sdk/python/v1beta1/examples/nas-with-darts.ipynb b/sdk/python/v1beta1/examples/nas-with-darts.ipynb index 814b68881fa..b09b282ded7 100644 --- a/sdk/python/v1beta1/examples/nas-with-darts.ipynb +++ b/sdk/python/v1beta1/examples/nas-with-darts.ipynb @@ -6,11 +6,11 @@ "source": [ "# Neural Architecture Search with DARTS\n", "\n", - "In this example you will deploy Katib experiment with Differentiable Architecture Search (DARTS) algorithm using Jupyter Notebook and Katib SDK. Your Kubernetes cluster must have at least one GPU for this example.\n", + "In this example you will deploy Katib Experiment with Differentiable Architecture Search (DARTS) algorithm using Jupyter Notebook and Katib SDK. Your Kubernetes cluster must have at least one GPU for this example.\n", "\n", "You can read more about how we use DARTS in Katib [here](https://github.com/kubeflow/katib/tree/master/pkg/suggestion/v1beta1/nas/darts).\n", "\n", - "The notebook shows how to create, get, check status and delete experiment." + "The notebook shows how to create, get, check status and delete an Experiment." ] }, { @@ -32,40 +32,40 @@ "output_type": "stream", "text": [ "Defaulting to user installation because normal site-packages is not writeable\n", - "Collecting kubeflow-katib\n", - " Downloading kubeflow_katib-0.0.5-py3-none-any.whl (112 kB)\n", - "\u001b[K |████████████████████████████████| 112 kB 18.7 MB/s eta 0:00:01\n", - "\u001b[?25hRequirement already satisfied: urllib3>=1.15.1 in /usr/local/lib/python3.6/dist-packages (from kubeflow-katib) (1.25.8)\n", - "Requirement already satisfied: python-dateutil>=2.5.3 in /usr/local/lib/python3.6/dist-packages (from kubeflow-katib) (2.8.1)\n", + "Collecting kubeflow-katib==0.10.1\n", + " Downloading kubeflow_katib-0.10.1-py3-none-any.whl (113 kB)\n", + "\u001b[K |████████████████████████████████| 113 kB 24.5 MB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: python-dateutil>=2.5.3 in /usr/local/lib/python3.6/dist-packages (from kubeflow-katib==0.10.1) (2.8.1)\n", + "Requirement already satisfied: kubernetes==10.0.1 in /usr/local/lib/python3.6/dist-packages (from kubeflow-katib==0.10.1) (10.0.1)\n", + "Requirement already satisfied: certifi>=14.05.14 in /usr/local/lib/python3.6/dist-packages (from kubeflow-katib==0.10.1) (2019.11.28)\n", + "Requirement already satisfied: setuptools>=21.0.0 in /usr/local/lib/python3.6/dist-packages (from kubeflow-katib==0.10.1) (45.1.0)\n", + "Requirement already satisfied: six>=1.10 in /usr/lib/python3/dist-packages (from kubeflow-katib==0.10.1) (1.11.0)\n", + "Requirement already satisfied: urllib3>=1.15.1 in /usr/local/lib/python3.6/dist-packages (from kubeflow-katib==0.10.1) (1.25.8)\n", "Collecting table-logger>=0.3.5\n", " Downloading table_logger-0.3.6-py3-none-any.whl (14 kB)\n", - "Requirement already satisfied: certifi>=14.05.14 in /usr/local/lib/python3.6/dist-packages (from kubeflow-katib) (2019.11.28)\n", - "Requirement already satisfied: kubernetes==10.0.1 in /usr/local/lib/python3.6/dist-packages (from kubeflow-katib) (10.0.1)\n", - "Requirement already satisfied: setuptools>=21.0.0 in /usr/local/lib/python3.6/dist-packages (from kubeflow-katib) (45.1.0)\n", - "Requirement already satisfied: six>=1.10 in /usr/lib/python3/dist-packages (from kubeflow-katib) (1.11.0)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from table-logger>=0.3.5->kubeflow-katib) (1.18.1)\n", - "Requirement already satisfied: pyyaml>=3.12 in /usr/local/lib/python3.6/dist-packages (from kubernetes==10.0.1->kubeflow-katib) (5.3)\n", - "Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /usr/local/lib/python3.6/dist-packages (from kubernetes==10.0.1->kubeflow-katib) (0.57.0)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from kubernetes==10.0.1->kubeflow-katib) (2.22.0)\n", - "Requirement already satisfied: google-auth>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from kubernetes==10.0.1->kubeflow-katib) (1.11.0)\n", - "Requirement already satisfied: requests-oauthlib in /usr/local/lib/python3.6/dist-packages (from kubernetes==10.0.1->kubeflow-katib) (1.3.0)\n", - "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->kubernetes==10.0.1->kubeflow-katib) (3.0.4)\n", - "Requirement already satisfied: idna<2.9,>=2.5 in /usr/lib/python3/dist-packages (from requests->kubernetes==10.0.1->kubeflow-katib) (2.6)\n", - "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.0.1->kubernetes==10.0.1->kubeflow-katib) (4.0.0)\n", - "Requirement already satisfied: rsa<4.1,>=3.1.4 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.0.1->kubernetes==10.0.1->kubeflow-katib) (4.0)\n", - "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.0.1->kubernetes==10.0.1->kubeflow-katib) (0.2.8)\n", - "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib->kubernetes==10.0.1->kubeflow-katib) (3.1.0)\n", - "Requirement already satisfied: pyasn1>=0.1.3 in /usr/local/lib/python3.6/dist-packages (from rsa<4.1,>=3.1.4->google-auth>=1.0.1->kubernetes==10.0.1->kubeflow-katib) (0.4.8)\n", + "Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /usr/local/lib/python3.6/dist-packages (from kubernetes==10.0.1->kubeflow-katib==0.10.1) (0.57.0)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from kubernetes==10.0.1->kubeflow-katib==0.10.1) (2.22.0)\n", + "Requirement already satisfied: requests-oauthlib in /usr/local/lib/python3.6/dist-packages (from kubernetes==10.0.1->kubeflow-katib==0.10.1) (1.3.0)\n", + "Requirement already satisfied: google-auth>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from kubernetes==10.0.1->kubeflow-katib==0.10.1) (1.11.0)\n", + "Requirement already satisfied: pyyaml>=3.12 in /usr/local/lib/python3.6/dist-packages (from kubernetes==10.0.1->kubeflow-katib==0.10.1) (5.3)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from table-logger>=0.3.5->kubeflow-katib==0.10.1) (1.18.1)\n", + "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->kubernetes==10.0.1->kubeflow-katib==0.10.1) (3.0.4)\n", + "Requirement already satisfied: idna<2.9,>=2.5 in /usr/lib/python3/dist-packages (from requests->kubernetes==10.0.1->kubeflow-katib==0.10.1) (2.6)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib->kubernetes==10.0.1->kubeflow-katib==0.10.1) (3.1.0)\n", + "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.0.1->kubernetes==10.0.1->kubeflow-katib==0.10.1) (4.0.0)\n", + "Requirement already satisfied: rsa<4.1,>=3.1.4 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.0.1->kubernetes==10.0.1->kubeflow-katib==0.10.1) (4.0)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth>=1.0.1->kubernetes==10.0.1->kubeflow-katib==0.10.1) (0.2.8)\n", + "Requirement already satisfied: pyasn1>=0.1.3 in /usr/local/lib/python3.6/dist-packages (from rsa<4.1,>=3.1.4->google-auth>=1.0.1->kubernetes==10.0.1->kubeflow-katib==0.10.1) (0.4.8)\n", "Installing collected packages: table-logger, kubeflow-katib\n", - "Successfully installed kubeflow-katib-0.0.5 table-logger-0.3.6\n", - "\u001b[33mWARNING: You are using pip version 20.0.2; however, version 20.2.3 is available.\n", + "Successfully installed kubeflow-katib-0.10.1 table-logger-0.3.6\n", + "\u001b[33mWARNING: You are using pip version 20.0.2; however, version 20.3 is available.\n", "You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ - "pip install kubeflow-katib" + "pip install kubeflow-katib==0.10.1" ] }, { @@ -77,7 +77,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -132,11 +132,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Define experiment\n", + "## Define your Experiment\n", "\n", - "You have to create experiment object before deploying it. This experiment is similar to [this](https://github.com/kubeflow/katib/blob/master/examples/v1beta1/nas/darts-example-gpu.yaml) example.\n", + "You have to create your Experiment object before deploying it. This Experiment is similar to [this](https://github.com/kubeflow/katib/blob/master/examples/v1beta1/nas/darts-example-gpu.yaml) example.\n", "\n", - "You can read more about DARTS algorithm settings [here](https://www.kubeflow.org/docs/components/hyperparameter-tuning/experiment/#differentiable-architecture-search-darts)." + "You can read more about DARTS algorithm settings [here](https://www.kubeflow.org/docs/components/katib/experiment/#differentiable-architecture-search-darts)." ] }, { @@ -145,7 +145,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Experiment metadata\n", + "# Experiment name and namespace.\n", "namespace = \"anonymous\"\n", "experiment_name = \"darts-example\"\n", "\n", @@ -155,7 +155,7 @@ ")\n", "\n", "\n", - "# Algorithm specification\n", + "# Algorithm specification.\n", "algorithm_spec=V1beta1AlgorithmSpec(\n", " algorithm_name=\"darts\",\n", " algorithm_settings=[\n", @@ -200,7 +200,7 @@ " )\n", ")\n", "\n", - "# Configuration for Neural Network (NN)\n", + "# Configuration for the Neural Network (NN).\n", "# This NN contains 2 number of layers and 5 various operations with different parameters.\n", "nas_config=V1beta1NasConfig(\n", " graph_config=V1beta1GraphConfig(\n", @@ -262,17 +262,22 @@ ")\n", "\n", "\n", - "# JSON trial template specification\n", + "# JSON template specification for the Trial's Worker Kubernetes Job.\n", "trial_spec={\n", " \"apiVersion\": \"batch/v1\",\n", " \"kind\": \"Job\",\n", " \"spec\": {\n", " \"template\": {\n", + " \"metadata\": {\n", + " \"annotations\": {\n", + " \"sidecar.istio.io/inject\": \"false\"\n", + " }\n", + " },\n", " \"spec\": {\n", " \"containers\": [\n", " {\n", " \"name\": \"training-container\",\n", - " \"image\": \"docker.io/kubeflowkatib/darts-cnn-cifar10:v1beta1-e294a90\",\n", + " \"image\": \"docker.io/kubeflowkatib/darts-cnn-cifar10:v1beta1-91e4996\",\n", " \"command\": [\n", " 'python3',\n", " 'run_trial.py',\n", @@ -280,7 +285,7 @@ " '--search-space=\"${trialParameters.searchSpace}\"',\n", " '--num-layers=\"${trialParameters.numberLayers}\"'\n", " ],\n", - " # Training container requires 1 GPU\n", + " # Training container requires 1 GPU.\n", " \"resources\": {\n", " \"limits\": {\n", " \"nvidia.com/gpu\": 1\n", @@ -294,10 +299,11 @@ " }\n", "}\n", "\n", - "# Template with trial parameters and trial spec\n", + "# Template with Trial parameters and Trial spec.\n", "# Set retain to True to save trial resources after completion.\n", "trial_template=V1beta1TrialTemplate(\n", " retain=True,\n", + " primary_container_name=\"training-container\",\n", " trial_parameters=[\n", " V1beta1TrialParameterSpec(\n", " name=\"algorithmSettings\",\n", @@ -319,7 +325,7 @@ ")\n", "\n", "\n", - "# Experiment object\n", + "# Experiment object.\n", "experiment = V1beta1Experiment(\n", " api_version=\"kubeflow.org/v1beta1\",\n", " kind=\"Experiment\",\n", @@ -341,7 +347,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "You can print experiment's info to verify it before submission" + "You can print the Experiment's info to verify it before submission." ] }, { @@ -353,12 +359,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'name': 'training-container', 'image': 'docker.io/kubeflowkatib/darts-cnn-cifar10:v1beta1-e294a90', 'command': ['python3', 'run_trial.py', '--algorithm-settings=\"${trialParameters.algorithmSettings}\"', '--search-space=\"${trialParameters.searchSpace}\"', '--num-layers=\"${trialParameters.numberLayers}\"'], 'resources': {'limits': {'nvidia.com/gpu': 1}}}\n" + "{'name': 'training-container', 'image': 'docker.io/kubeflowkatib/darts-cnn-cifar10:v1beta1-91e4996', 'command': ['python3', 'run_trial.py', '--algorithm-settings=\"${trialParameters.algorithmSettings}\"', '--search-space=\"${trialParameters.searchSpace}\"', '--num-layers=\"${trialParameters.numberLayers}\"'], 'resources': {'limits': {'nvidia.com/gpu': 1}}}\n" ] } ], "source": [ - "# Print trial template container info\n", + "# Print the Trial template container info.\n", "print(experiment.spec.trial_template.trial_spec[\"spec\"][\"template\"][\"spec\"][\"containers\"][0])" ] }, @@ -366,11 +372,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Create experiment\n", + "# Create your Experiment\n", "\n", - "You have to create Katib client to use SDK\n", + "You have to create Katib client to use the SDK\n", "\n", - "TODO (andreyvelich): Current experiment link for NAS is incorrect." + "TODO (andreyvelich): Current Experiment link for NAS is incorrect." ] }, { @@ -395,13 +401,13 @@ "text/plain": [ "{'apiVersion': 'kubeflow.org/v1beta1',\n", " 'kind': 'Experiment',\n", - " 'metadata': {'creationTimestamp': '2020-09-14T23:21:51Z',\n", + " 'metadata': {'creationTimestamp': '2020-11-30T19:32:50Z',\n", " 'generation': 1,\n", " 'name': 'darts-example',\n", " 'namespace': 'anonymous',\n", - " 'resourceVersion': '127105457',\n", + " 'resourceVersion': '170793314',\n", " 'selfLink': '/apis/kubeflow.org/v1beta1/namespaces/anonymous/experiments/darts-example',\n", - " 'uid': 'b6b73ce9-e1a5-4d7f-8569-88da89285671'},\n", + " 'uid': '00cd64b7-9388-4faf-93e2-5667989f723d'},\n", " 'spec': {'algorithm': {'algorithmName': 'darts',\n", " 'algorithmSettings': [{'name': 'num_epochs', 'value': '2'},\n", " {'name': 'stem_multiplier', 'value': '1'},\n", @@ -435,7 +441,10 @@ " 'type': 'maximize'},\n", " 'parallelTrialCount': 1,\n", " 'resumePolicy': 'LongRunning',\n", - " 'trialTemplate': {'retain': True,\n", + " 'trialTemplate': {'failureCondition': 'status.conditions.#(type==\"Failed\")#|#(status==\"True\")#',\n", + " 'primaryContainerName': 'training-container',\n", + " 'retain': True,\n", + " 'successCondition': 'status.conditions.#(type==\"Complete\")#|#(status==\"True\")#',\n", " 'trialParameters': [{'description': ' Algorithm settings of DARTS Experiment',\n", " 'name': 'algorithmSettings',\n", " 'reference': 'algorithm-settings'},\n", @@ -447,12 +456,13 @@ " 'reference': 'num-layers'}],\n", " 'trialSpec': {'apiVersion': 'batch/v1',\n", " 'kind': 'Job',\n", - " 'spec': {'template': {'spec': {'containers': [{'command': ['python3',\n", + " 'spec': {'template': {'metadata': {'annotations': {'sidecar.istio.io/inject': 'false'}},\n", + " 'spec': {'containers': [{'command': ['python3',\n", " 'run_trial.py',\n", " '--algorithm-settings=\"${trialParameters.algorithmSettings}\"',\n", " '--search-space=\"${trialParameters.searchSpace}\"',\n", " '--num-layers=\"${trialParameters.numberLayers}\"'],\n", - " 'image': 'docker.io/kubeflowkatib/darts-cnn-cifar10:v1beta1-e294a90',\n", + " 'image': 'docker.io/kubeflowkatib/darts-cnn-cifar10:v1beta1-91e4996',\n", " 'name': 'training-container',\n", " 'resources': {'limits': {'nvidia.com/gpu': 1}}}],\n", " 'restartPolicy': 'Never'}}}}}}}" @@ -464,10 +474,10 @@ } ], "source": [ - "# Create client\n", + "# Create client.\n", "kclient = KatibClient()\n", "\n", - "# Create experiment\n", + "# Create your Experiment.\n", "kclient.create_experiment(experiment,namespace=namespace)" ] }, @@ -475,24 +485,24 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Get experiment\n", + "# Get your Experiment\n", "\n", - "You can get experiment by name and receive required data" + "You can get your Experiment by name and receive required data." ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "{'apiVersion': 'kubeflow.org/v1beta1', 'kind': 'Experiment', 'metadata': {'creationTimestamp': '2020-09-14T23:21:51Z', 'finalizers': ['update-prometheus-metrics'], 'generation': 1, 'name': 'darts-example', 'namespace': 'anonymous', 'resourceVersion': '127105807', 'selfLink': '/apis/kubeflow.org/v1beta1/namespaces/anonymous/experiments/darts-example', 'uid': 'b6b73ce9-e1a5-4d7f-8569-88da89285671'}, 'spec': {'algorithm': {'algorithmName': 'darts', 'algorithmSettings': [{'name': 'num_epochs', 'value': '2'}, {'name': 'stem_multiplier', 'value': '1'}, {'name': 'init_channels', 'value': '4'}, {'name': 'num_nodes', 'value': '3'}]}, 'maxFailedTrialCount': 1, 'maxTrialCount': 1, 'metricsCollectorSpec': {'collector': {'kind': 'StdOut'}, 'source': {'filter': {'metricsFormat': ['([\\\\w-]+)=(Genotype.*)']}}}, 'nasConfig': {'graphConfig': {'numLayers': 2}, 'operations': [{'operationType': 'separable_convolution', 'parameters': [{'feasibleSpace': {'list': ['3']}, 'name': 'filter_size', 'parameterType': 'categorical'}]}, {'operationType': 'dilated_convolution', 'parameters': [{'feasibleSpace': {'list': ['3', '5']}, 'name': 'filter_size', 'parameterType': 'categorical'}]}, {'operationType': 'avg_pooling', 'parameters': [{'feasibleSpace': {'list': ['3']}, 'name': 'filter_size', 'parameterType': 'categorical'}]}, {'operationType': 'max_pooling', 'parameters': [{'feasibleSpace': {'list': ['3']}, 'name': 'filter_size', 'parameterType': 'categorical'}]}, {'operationType': 'skip_connection'}]}, 'objective': {'metricStrategies': [{'name': 'Best-Genotype', 'value': 'max'}], 'objectiveMetricName': 'Best-Genotype', 'type': 'maximize'}, 'parallelTrialCount': 1, 'resumePolicy': 'LongRunning', 'trialTemplate': {'retain': True, 'trialParameters': [{'description': ' Algorithm settings of DARTS Experiment', 'name': 'algorithmSettings', 'reference': 'algorithm-settings'}, {'description': 'Search Space of DARTS Experiment', 'name': 'searchSpace', 'reference': 'search-space'}, {'description': 'Number of Neural Network layers', 'name': 'numberLayers', 'reference': 'num-layers'}], 'trialSpec': {'apiVersion': 'batch/v1', 'kind': 'Job', 'spec': {'template': {'spec': {'containers': [{'command': ['python3', 'run_trial.py', '--algorithm-settings=\"${trialParameters.algorithmSettings}\"', '--search-space=\"${trialParameters.searchSpace}\"', '--num-layers=\"${trialParameters.numberLayers}\"'], 'image': 'docker.io/kubeflowkatib/darts-cnn-cifar10:v1beta1-e294a90', 'name': 'training-container', 'resources': {'limits': {'nvidia.com/gpu': 1}}}], 'restartPolicy': 'Never'}}}}}}, 'status': {'conditions': [{'lastTransitionTime': '2020-09-14T23:21:51Z', 'lastUpdateTime': '2020-09-14T23:21:51Z', 'message': 'Experiment is created', 'reason': 'ExperimentCreated', 'status': 'True', 'type': 'Created'}, {'lastTransitionTime': '2020-09-14T23:22:36Z', 'lastUpdateTime': '2020-09-14T23:22:36Z', 'message': 'Experiment is running', 'reason': 'ExperimentRunning', 'status': 'True', 'type': 'Running'}], 'currentOptimalTrial': {'bestTrialName': '', 'observation': {'metrics': None}, 'parameterAssignments': None}, 'runningTrialList': ['darts-example-pq4jjhgs'], 'startTime': '2020-09-14T23:21:51Z', 'trials': 1, 'trialsRunning': 1}}\n", + "{'apiVersion': 'kubeflow.org/v1beta1', 'kind': 'Experiment', 'metadata': {'creationTimestamp': '2020-11-30T19:32:50Z', 'finalizers': ['update-prometheus-metrics'], 'generation': 1, 'name': 'darts-example', 'namespace': 'anonymous', 'resourceVersion': '170793467', 'selfLink': '/apis/kubeflow.org/v1beta1/namespaces/anonymous/experiments/darts-example', 'uid': '00cd64b7-9388-4faf-93e2-5667989f723d'}, 'spec': {'algorithm': {'algorithmName': 'darts', 'algorithmSettings': [{'name': 'num_epochs', 'value': '2'}, {'name': 'stem_multiplier', 'value': '1'}, {'name': 'init_channels', 'value': '4'}, {'name': 'num_nodes', 'value': '3'}]}, 'maxFailedTrialCount': 1, 'maxTrialCount': 1, 'metricsCollectorSpec': {'collector': {'kind': 'StdOut'}, 'source': {'filter': {'metricsFormat': ['([\\\\w-]+)=(Genotype.*)']}}}, 'nasConfig': {'graphConfig': {'numLayers': 2}, 'operations': [{'operationType': 'separable_convolution', 'parameters': [{'feasibleSpace': {'list': ['3']}, 'name': 'filter_size', 'parameterType': 'categorical'}]}, {'operationType': 'dilated_convolution', 'parameters': [{'feasibleSpace': {'list': ['3', '5']}, 'name': 'filter_size', 'parameterType': 'categorical'}]}, {'operationType': 'avg_pooling', 'parameters': [{'feasibleSpace': {'list': ['3']}, 'name': 'filter_size', 'parameterType': 'categorical'}]}, {'operationType': 'max_pooling', 'parameters': [{'feasibleSpace': {'list': ['3']}, 'name': 'filter_size', 'parameterType': 'categorical'}]}, {'operationType': 'skip_connection'}]}, 'objective': {'metricStrategies': [{'name': 'Best-Genotype', 'value': 'max'}], 'objectiveMetricName': 'Best-Genotype', 'type': 'maximize'}, 'parallelTrialCount': 1, 'resumePolicy': 'LongRunning', 'trialTemplate': {'failureCondition': 'status.conditions.#(type==\"Failed\")#|#(status==\"True\")#', 'primaryContainerName': 'training-container', 'retain': True, 'successCondition': 'status.conditions.#(type==\"Complete\")#|#(status==\"True\")#', 'trialParameters': [{'description': ' Algorithm settings of DARTS Experiment', 'name': 'algorithmSettings', 'reference': 'algorithm-settings'}, {'description': 'Search Space of DARTS Experiment', 'name': 'searchSpace', 'reference': 'search-space'}, {'description': 'Number of Neural Network layers', 'name': 'numberLayers', 'reference': 'num-layers'}], 'trialSpec': {'apiVersion': 'batch/v1', 'kind': 'Job', 'spec': {'template': {'metadata': {'annotations': {'sidecar.istio.io/inject': 'false'}}, 'spec': {'containers': [{'command': ['python3', 'run_trial.py', '--algorithm-settings=\"${trialParameters.algorithmSettings}\"', '--search-space=\"${trialParameters.searchSpace}\"', '--num-layers=\"${trialParameters.numberLayers}\"'], 'image': 'docker.io/kubeflowkatib/darts-cnn-cifar10:v1beta1-91e4996', 'name': 'training-container', 'resources': {'limits': {'nvidia.com/gpu': 1}}}], 'restartPolicy': 'Never'}}}}}}, 'status': {'conditions': [{'lastTransitionTime': '2020-11-30T19:32:50Z', 'lastUpdateTime': '2020-11-30T19:32:50Z', 'message': 'Experiment is created', 'reason': 'ExperimentCreated', 'status': 'True', 'type': 'Created'}, {'lastTransitionTime': '2020-11-30T19:33:07Z', 'lastUpdateTime': '2020-11-30T19:33:07Z', 'message': 'Experiment is running', 'reason': 'ExperimentRunning', 'status': 'True', 'type': 'Running'}], 'currentOptimalTrial': {'bestTrialName': '', 'observation': {'metrics': None}, 'parameterAssignments': None}, 'runningTrialList': ['darts-example-t8qb74sn'], 'startTime': '2020-11-30T19:32:50Z', 'trials': 1, 'trialsRunning': 1}}\n", "-----------------\n", "\n", - "{'lastTransitionTime': '2020-09-14T23:22:36Z', 'lastUpdateTime': '2020-09-14T23:22:36Z', 'message': 'Experiment is running', 'reason': 'ExperimentRunning', 'status': 'True', 'type': 'Running'}\n" + "{'lastTransitionTime': '2020-11-30T19:33:07Z', 'lastUpdateTime': '2020-11-30T19:33:07Z', 'message': 'Experiment is running', 'reason': 'ExperimentRunning', 'status': 'True', 'type': 'Running'}\n" ] } ], @@ -501,7 +511,7 @@ "print(exp)\n", "print(\"-----------------\\n\")\n", "\n", - "# Get last status\n", + "# Get the latest status.\n", "print(exp[\"status\"][\"conditions\"][-1])" ] }, @@ -509,14 +519,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Get current experiment status\n", + "# Get the current Experiment status\n", "\n", - "You can check current experiment status" + "You can check the current Experiment status." ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 6, "metadata": { "scrolled": true }, @@ -527,7 +537,7 @@ "'Succeeded'" ] }, - "execution_count": 9, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -540,12 +550,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "You can check if experiment is succeeded" + "You can check if your Experiment is succeeded." ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -554,7 +564,7 @@ "True" ] }, - "execution_count": 10, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -567,23 +577,23 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Get best Genotype\n", + "# Get the best Genotype\n", "\n", - "Best Genotype is located in optimal trial currently. Latest Genotype is the best.\n", + "Best Genotype is located in the optimal Trial currently. The latest Genotype is the best.\n", "\n", - "Check trial logs to get more information about training process." + "Check your Trial logs to get more information about the training process." ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Genotype(normal=[[('separable_convolution_3x3',0),('dilated_convolution_3x3',1)],[('dilated_convolution_5x5',1),('dilated_convolution_3x3',2)],[('dilated_convolution_5x5',2),('separable_convolution_3x3',3)]],normal_concat=range(2,5),reduce=[[('separable_convolution_3x3',1),('separable_convolution_3x3',0)],[('max_pooling_3x3',2),('max_pooling_3x3',1)],[('max_pooling_3x3',2),('max_pooling_3x3',3)]],reduce_concat=range(2,5))\n" + "Genotype(normal=[[('separable_convolution_3x3',0),('dilated_convolution_3x3',1)],[('dilated_convolution_3x3',2),('dilated_convolution_5x5',1)],[('dilated_convolution_5x5',2),('separable_convolution_3x3',3)]],normal_concat=range(2,5),reduce=[[('separable_convolution_3x3',1),('separable_convolution_3x3',0)],[('max_pooling_3x3',2),('max_pooling_3x3',1)],[('max_pooling_3x3',2),('max_pooling_3x3',3)]],reduce_concat=range(2,5))\n" ] } ], @@ -598,14 +608,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Delete experiments\n", + "# Delete your Experiments\n", "\n", - "You can delete experiments" + "You can delete your Experiments." ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 9, "metadata": { "scrolled": false }, @@ -615,16 +625,16 @@ "text/plain": [ "{'apiVersion': 'kubeflow.org/v1beta1',\n", " 'kind': 'Experiment',\n", - " 'metadata': {'creationTimestamp': '2020-09-14T23:21:51Z',\n", + " 'metadata': {'creationTimestamp': '2020-11-30T19:32:50Z',\n", " 'deletionGracePeriodSeconds': 0,\n", - " 'deletionTimestamp': '2020-09-14T23:27:32Z',\n", + " 'deletionTimestamp': '2020-11-30T19:36:58Z',\n", " 'finalizers': ['update-prometheus-metrics'],\n", " 'generation': 2,\n", " 'name': 'darts-example',\n", " 'namespace': 'anonymous',\n", - " 'resourceVersion': '127108134',\n", + " 'resourceVersion': '170795247',\n", " 'selfLink': '/apis/kubeflow.org/v1beta1/namespaces/anonymous/experiments/darts-example',\n", - " 'uid': 'b6b73ce9-e1a5-4d7f-8569-88da89285671'},\n", + " 'uid': '00cd64b7-9388-4faf-93e2-5667989f723d'},\n", " 'spec': {'algorithm': {'algorithmName': 'darts',\n", " 'algorithmSettings': [{'name': 'num_epochs', 'value': '2'},\n", " {'name': 'stem_multiplier', 'value': '1'},\n", @@ -658,7 +668,10 @@ " 'type': 'maximize'},\n", " 'parallelTrialCount': 1,\n", " 'resumePolicy': 'LongRunning',\n", - " 'trialTemplate': {'retain': True,\n", + " 'trialTemplate': {'failureCondition': 'status.conditions.#(type==\"Failed\")#|#(status==\"True\")#',\n", + " 'primaryContainerName': 'training-container',\n", + " 'retain': True,\n", + " 'successCondition': 'status.conditions.#(type==\"Complete\")#|#(status==\"True\")#',\n", " 'trialParameters': [{'description': ' Algorithm settings of DARTS Experiment',\n", " 'name': 'algorithmSettings',\n", " 'reference': 'algorithm-settings'},\n", @@ -670,36 +683,37 @@ " 'reference': 'num-layers'}],\n", " 'trialSpec': {'apiVersion': 'batch/v1',\n", " 'kind': 'Job',\n", - " 'spec': {'template': {'spec': {'containers': [{'command': ['python3',\n", + " 'spec': {'template': {'metadata': {'annotations': {'sidecar.istio.io/inject': 'false'}},\n", + " 'spec': {'containers': [{'command': ['python3',\n", " 'run_trial.py',\n", " '--algorithm-settings=\"${trialParameters.algorithmSettings}\"',\n", " '--search-space=\"${trialParameters.searchSpace}\"',\n", " '--num-layers=\"${trialParameters.numberLayers}\"'],\n", - " 'image': 'docker.io/kubeflowkatib/darts-cnn-cifar10:v1beta1-e294a90',\n", + " 'image': 'docker.io/kubeflowkatib/darts-cnn-cifar10:v1beta1-91e4996',\n", " 'name': 'training-container',\n", " 'resources': {'limits': {'nvidia.com/gpu': 1}}}],\n", " 'restartPolicy': 'Never'}}}}}},\n", - " 'status': {'completionTime': '2020-09-14T23:25:53Z',\n", - " 'conditions': [{'lastTransitionTime': '2020-09-14T23:21:51Z',\n", - " 'lastUpdateTime': '2020-09-14T23:21:51Z',\n", + " 'status': {'completionTime': '2020-11-30T19:36:29Z',\n", + " 'conditions': [{'lastTransitionTime': '2020-11-30T19:32:50Z',\n", + " 'lastUpdateTime': '2020-11-30T19:32:50Z',\n", " 'message': 'Experiment is created',\n", " 'reason': 'ExperimentCreated',\n", " 'status': 'True',\n", " 'type': 'Created'},\n", - " {'lastTransitionTime': '2020-09-14T23:25:53Z',\n", - " 'lastUpdateTime': '2020-09-14T23:25:53Z',\n", + " {'lastTransitionTime': '2020-11-30T19:36:29Z',\n", + " 'lastUpdateTime': '2020-11-30T19:36:29Z',\n", " 'message': 'Experiment is running',\n", " 'reason': 'ExperimentRunning',\n", " 'status': 'False',\n", " 'type': 'Running'},\n", - " {'lastTransitionTime': '2020-09-14T23:25:53Z',\n", - " 'lastUpdateTime': '2020-09-14T23:25:53Z',\n", + " {'lastTransitionTime': '2020-11-30T19:36:29Z',\n", + " 'lastUpdateTime': '2020-11-30T19:36:29Z',\n", " 'message': 'Experiment has succeeded because max trial count has reached',\n", " 'reason': 'ExperimentMaxTrialsReached',\n", " 'status': 'True',\n", " 'type': 'Succeeded'}],\n", - " 'currentOptimalTrial': {'bestTrialName': 'darts-example-pq4jjhgs',\n", - " 'observation': {'metrics': [{'latest': \"Genotype(normal=[[('separable_convolution_3x3',0),('dilated_convolution_3x3',1)],[('dilated_convolution_5x5',1),('dilated_convolution_3x3',2)],[('dilated_convolution_5x5',2),('separable_convolution_3x3',3)]],normal_concat=range(2,5),reduce=[[('separable_convolution_3x3',1),('separable_convolution_3x3',0)],[('max_pooling_3x3',2),('max_pooling_3x3',1)],[('max_pooling_3x3',2),('max_pooling_3x3',3)]],reduce_concat=range(2,5))\",\n", + " 'currentOptimalTrial': {'bestTrialName': 'darts-example-t8qb74sn',\n", + " 'observation': {'metrics': [{'latest': \"Genotype(normal=[[('separable_convolution_3x3',0),('dilated_convolution_3x3',1)],[('dilated_convolution_3x3',2),('dilated_convolution_5x5',1)],[('dilated_convolution_5x5',2),('separable_convolution_3x3',3)]],normal_concat=range(2,5),reduce=[[('separable_convolution_3x3',1),('separable_convolution_3x3',0)],[('max_pooling_3x3',2),('max_pooling_3x3',1)],[('max_pooling_3x3',2),('max_pooling_3x3',3)]],reduce_concat=range(2,5))\",\n", " 'max': 'unavailable',\n", " 'min': 'unavailable',\n", " 'name': 'Best-Genotype'}]},\n", @@ -708,13 +722,13 @@ " {'name': 'search-space',\n", " 'value': \"['separable_convolution_3x3', 'dilated_convolution_3x3', 'dilated_convolution_5x5', 'avg_pooling_3x3', 'max_pooling_3x3', 'skip_connection']\"},\n", " {'name': 'num-layers', 'value': '2'}]},\n", - " 'startTime': '2020-09-14T23:21:51Z',\n", - " 'succeededTrialList': ['darts-example-pq4jjhgs'],\n", + " 'startTime': '2020-11-30T19:32:50Z',\n", + " 'succeededTrialList': ['darts-example-t8qb74sn'],\n", " 'trials': 1,\n", " 'trialsSucceeded': 1}}" ] }, - "execution_count": 12, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -722,6 +736,13 @@ "source": [ "kclient.delete_experiment(name=experiment_name, namespace=namespace)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {