In [None]:
%cd /home/dvc-2-iris-demo

# Init DVC repository

In [None]:
# Checkout to new branch
!git checkout -b experiments

# Init DVC repository and setup DVC remote storage
!dvc init
!dvc remote add -d myremote /tmp

# Overview main config

In [None]:
import yaml

config = yaml.safe_load(open('config/pipeline_config.yml'))
config

# Create and run pipelines

## 1. Setup / config

In [None]:
!dvc run -n prepare_configs \
    -d src/pipelines/prepare_configs.py \
    -d config/pipeline_config.yml \
    -o experiments/split_train_test_config.yml \
    -o experiments/featurize_config.yml \
    -o experiments/train_config.yml \
    -o experiments/evaluate_config.yml \
    python src/pipelines/prepare_configs.py \
        --config=config/pipeline_config.yml

## 2. Featurization

In [None]:
!dvc run -n featurize \
    -d src/pipelines/featurize.py \
    -d experiments/featurize_config.yml \
    -d data/raw/iris.csv \
    -o data/interim/featured_iris.csv \
    python src/pipelines/featurize.py \
        --config=experiments/featurize_config.yml

## 3. Split dataset into train/test

In [None]:
!dvc run -n split_train_test \
    -d src/pipelines/split_train_test.py \
    -d experiments/split_train_test_config.yml \
    -d data/interim/featured_iris.csv \
    -o data/processed/train_iris.csv \
    -o data/processed/test_iris.csv \
    python src/pipelines/split_train_test.py \
        --config=experiments/split_train_test_config.yml \
        --base_config=config/pipeline_config.yml

## 4. Train

In [None]:
!dvc run -n train \
    -d src/pipelines/train.py \
    -d experiments/train_config.yml \
    -d data/processed/train_iris.csv \
    -o models/model.joblib \
    python src/pipelines/train.py \
        --config=experiments/train_config.yml \
        --base_config=config/pipeline_config.yml

## 5. Evaluate 

In [None]:
!dvc run -n evaluate \
    -d src/pipelines/evaluate.py \
    -d experiments/evaluate_config.yml \
    -d models/model.joblib \
    -m experiments/eval.txt \
    python src/pipelines/evaluate.py \
        --config=experiments/evaluate_config.yml \
        --base_config=config/pipeline_config.yml

# Reproduce pipeline

### Evaluate (final stage)

In [None]:
# Pipeline is up to date. Nothing to reproduce.
!dvc repro

In [None]:
# -f force reproducing pipeline
!dvc repro -f