valohai.yaml

---

- step:
    name: build-cpu-gpu-uberjar
    image: neomatrix369/dl4j-nlp-cuda:v0.5
    inputs:
      - name: src-main-resources
        default: https://github.com/neomatrix369/dl4j-nlp-cuda-example/releases/download/dl4j-nlp-src-main-resources-v0.1/dl4j-nlp-src-main-resources.tgz
        description: NLP data for training, prediction, evaluation
    command:
      - ./buildUberJar.sh
    environment: aws-eu-west-1-g2-2xlarge

- step:
    name: train-cpu-linux
    image: neomatrix369/dl4j-nlp-cuda:v0.5
    inputs:
      - name: cpu-linux-uberjar
        default: datum://
        description: dl4j nlp cpu linux uberjar
      - name: imdb-reviews
        default: http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
        description: IMDB Review dataset for sentiment analysis
      - name: google-word2vec
        default: https://s3.amazonaws.com/dl4j-distribution/GoogleNews-vectors-negative300.bin.gz
        description: word2vec pre-trained Google News corpus
    command:
      - cd ${VH_REPOSITORY_DIR}
      - export BACKEND=cpu
      - export ACTION=train
      - time ./runUberJar.sh --action ${ACTION} --output-model-dir .
    environment: aws-eu-west-1-g3-4xlarge

- step:
    name: train-gpu-linux
    image: neomatrix369/dl4j-nlp-cuda:v0.5
    inputs:
    - name: gpu-linux-uberjar
      default: datum://
      description: dl4j nlp gpu linux uberjar
    - name: imdb-reviews
      default: http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
      description: IMDB Review dataset for sentiment analysis
    - name: google-word2vec
      default: https://s3.amazonaws.com/dl4j-distribution/GoogleNews-vectors-negative300.bin.gz
      description: word2vec pre-trained Google News corpus
    command:
    - cd ${VH_REPOSITORY_DIR}
    - export BACKEND=gpu
    - export ACTION=train
    - time ./runUberJar.sh --action ${ACTION} --output-model-dir .
    environment: aws-eu-west-1-g3-4xlarge

- step:
      name: evaluate-model-linux
      image: neomatrix369/dl4j-nlp-cuda:v0.5
      inputs:
      - name: linux-uberjar
        default: datum://
        description: dl4j nlp linux uberjar
      - name: model
        default: datum://
        description: nlp model trained on Google news corpus
      - name: imdb-reviews
        default: http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
        description: IMDB Review dataset for sentiment analysis
      - name: google-word2vec
        default: https://s3.amazonaws.com/dl4j-distribution/GoogleNews-vectors-negative300.bin.gz
        description: word2vec pre-trained Google News corpus
      command:
      - cd ${VH_REPOSITORY_DIR}
      - export ACTION=evaluate
      - echo "~~~ Copying jar and model into ${VH_REPOSITORY_DIR}"
      - cp ${VH_INPUTS_DIR}/linux-uberjar/*.jar ${VH_REPOSITORY_DIR}
      - cp ${VH_INPUTS_DIR}/model/*.pb .
      - time ./runUberJar.sh --action ${ACTION} --input-model-file $(ls *.pb)
      environment: aws-eu-west-1-g3-4xlarge

###
### aws-eu-west-1-g2-2xlarge
###     GPU Specification | 1x GPU - NVIDIA GRID K520 (4GB)
###           Description | 8 cores, 15GB memory, 500GB storage
###         Per-Hour USD$ | 0.90700
### aws-eu-west-1-g3-4xlarge
###     GPU Specification | 1x GPU - NVIDIA Tesla M60 (8GB)
###           Description | 16 cores, 122GB memory, 500GB storage
###         Per-Hour USD$ | 1.21000
### aws-eu-west-1-g2-8xlarge
###     GPU Specification | 4x GPU - NVIDIA GRID K520 (16GB)
###           Description | 32 cores, 60GB memory, 500GB storage
###         Per-Hour USD$ | 2.80800
###
- step:
    name: know-your-gpus
    image: neomatrix369/dl4j-nlp-cuda:v0.5
    command:
      - cd ${VH_REPOSITORY_DIR}
      - ./know-your-gpus.sh &> "${VH_OUTPUTS_DIR}/know-your-gpus.logs"
      - cat "${VH_OUTPUTS_DIR}/know-your-gpus.logs"
    environment: aws-eu-west-1-g3-4xlarge