# Training Process
This run has an example of running a training job

1. Creating AI template and instance

In [1]:
import os.path
import shutil

from superai.meta_ai.ai import AITemplate, AI, TrainingOrchestrator
from superai.meta_ai.parameters import Config
from superai.meta_ai.schema import Schema

In [2]:
if os.path.exists(".AISave"):
    shutil.rmtree(".AISave")

In [3]:
template = AITemplate(
    input_schema=Schema(),
    output_schema=Schema(),
    configuration=Config(),
    name="MnistTrainingTemplate",
    description="Template of Sample MNIST training",
    model_class="MnistModel",
    requirements=["tensorflow-gpu", "polyaxon"],
    artifacts={"run": "resources/runDir/run_this.sh"},
    code_path=["resources/runDir"],
)
ai = AI(
    ai_template=template,
    input_params=template.input_schema.parameters(),
    output_params=template.output_schema.parameters(),
    name="mnist_training",
    version=1,
    description="AI instance of sample MNIST training",
)

## Create and push training container
Use the following interface to create and push the training container.

> Note: Later, this interface will contain implementation to connect to meta-ai to create a training job.

In [4]:
ai._id = "DAAD6583-1FD7-4718-8B12-333701894FDB".lower()

ai.training_deploy(orchestrator=TrainingOrchestrator.AWS_EKS, skip_build=False, enable_cuda=True)

Output()

# Starting training manually
To start training manually, in a separate terminal run, `polyaxon port-forward` to connect to Polyaxon API.

Then run the following cell
> Note: triggering training will be done from meta-ai in the future.

In [5]:
!cat polyaxonfile.yaml

version: 1.1
kind: component
tags: [examples, keras]

inputs:
- {name: conv1_size, type: int, value: 32, isOptional: true}
- {name: conv2_size, type: int, value: 64, isOptional: true}
- {name: dropout, type: float, value: 0.8, isOptional: true}
- {name: hidden1_size, type: int, value: 500, isOptional: true}
- {name: optimizer, type: str, value: adam, isOptional: true}
- {name: log_learning_rate, type: int, value: -3, isOptional: true}
- {name: epochs, type: int, value: 10, isOptional: true}

run:
  kind: job
  container:
    image: 185169359328.dkr.ecr.us-east-1.amazonaws.com/models/dev/daad6583-1fd7-4718-8b12-333701894fdb/mnist_training:1
#    workingDir: "{{ globals.artifacts_path }}/polyaxon-examples/in_cluster/keras/mnist"
    command: ["/opt/conda/envs/env/bin/superai", "ai", "train"]
    imagePullPolicy: Always
    args: ["-p", "/home/model-server/",
           "-tp","/tmp",
           "-mp","/tmp",
           "-m","conv1_size={{ conv1_size }}",
          

In [None]:
!polyaxon run -f polyaxonfile.yaml -u -l

Creating a new run...
[32mA new run `aa3d02416d554149b3620083348ddd32` was created[0m
You can view this run on Polyaxon UI: http://localhost:8000/ui/default/dev/runs/aa3d02416d554149b3620083348ddd32/
[32mArtifacts uploaded[0m
[32mRun is approved[0m
[32mStarting logs for run: <Name: None> - <uuid: aa3d02416d554149b3620083348ddd32>[0m
[33mrunning[0m
[37m2022-02-18 12:34:38.935113+01:00[0m | Reading configs from /opt/conda/envs/env/lib/python3.7/site-packages/superai/settings.yaml
[37m2022-02-18 12:34:38.935136+01:00[0m | Available envs:
[37m2022-02-18 12:34:38.935139+01:00[0m | - local
[37m2022-02-18 12:34:38.935142+01:00[0m | - dev
[37m2022-02-18 12:34:38.935145+01:00[0m | - sandbox
[37m2022-02-18 12:34:38.935148+01:00[0m | - stg
[37m2022-02-18 12:34:38.935151+01:00[0m | - prod
[37m2022-02-18 12:34:38.935153+01:00[0m | Reading configs from /opt/conda/envs/env/lib/python3.7/site-packages/superai/settings.yaml
[37m2022-02-18 12:34:38.935155+01:00[0m | [02/18/22