# Preparation

In [1]:
%cd /home/mlflow-1-tracking/

/home/mlflow-1-tracking


In [2]:
import yaml

# Look on pipelines config 
config = yaml.load(open('config/pipeline_config.yml'), Loader=yaml.FullLoader)

config

{'base': {'project': '7labs/mlflow-1-tracking',
  'name': 'iris',
  'tags': ['solution-0-prototype', 'dev'],
  'model': {'model_name': 'model.joblib', 'models_folder': 'models'},
  'experiments': {'experiments_folder': 'experiments'},
  'random_state': 42},
 'split_train_test': {'folder': 'experiments',
  'train_csv': 'data/processed/train_iris.csv',
  'test_csv': 'data/processed/test_iris.csv',
  'test_size': 0.2},
 'featurize': {'dataset_csv': 'data/raw/iris.csv',
  'featured_dataset_csv': 'data/interim/featured_iris.csv',
  'features_columns_range': ['sepal_length', 'petal_length_to_petal_width'],
  'target_column': 'species'},
 'train': {'cv': 5,
  'estimator_name': 'SVC',
  'estimators': {'LogisticRegression': {'param_grid': {'C': [0.001, 0.01],
     'max_iter': [100],
     'solver': ['lbfgs'],
     'multi_class': ['multinomial']}},
   'SVC': {'param_grid': {'C': [0.1, 1.0],
     'kernel': ['rbf', 'linear'],
     'gamma': ['scale'],
     'degree': [3, 5]}}}},
 'evaluate': {'metric

# Prepare configs

In [3]:
!python src/pipelines/prepare_configs.py \
    --config=config/pipeline_config.yml

Save config: experiments/base_config.yml
Save config: experiments/split_train_test_config.yml
Save config: experiments/featurize_config.yml
Save config: experiments/train_config.yml
Save config: experiments/evaluate_config.yml


## Browse folder with configs

In [4]:
!ls experiments/

base_config.yml      featurize_config.yml	  train_config.yml
evaluate_config.yml  split_train_test_config.yml


# Extract features

In [5]:
# featurization config
!cat experiments/featurize_config.yml

dataset_csv: data/raw/iris.csv
featured_dataset_csv: data/interim/featured_iris.csv
features_columns_range:
- sepal_length
- petal_length_to_petal_width
target_column: species


In [6]:
!python src/pipelines/featurize.py \
    --config=experiments/featurize_config.yml

In [7]:
# iris dataset with new features is created
!ls data/interim

featured_iris.csv


# Split train/test dataset

In [8]:
# split config
!cat experiments/split_train_test_config.yml

folder: experiments
test_csv: data/processed/test_iris.csv
test_size: 0.2
train_csv: data/processed/train_iris.csv


In [9]:
!python src/pipelines/split_train_test.py \
    --config=experiments/split_train_test_config.yml \
    --base_config=config/pipeline_config.yml
    

In [10]:
# train and test datsets are created
!ls data/processed/

test_iris.csv  train_iris.csv


# Train model

In [11]:
# train config
!cat experiments/train_config.yml

cv: 5
estimator_name: SVC
estimators:
  LogisticRegression:
    param_grid:
      C:
      - 0.001
      - 0.01
      max_iter:
      - 100
      multi_class:
      - multinomial
      solver:
      - lbfgs
  SVC:
    param_grid:
      C:
      - 0.1
      - 1.0
      degree:
      - 3
      - 5
      gamma:
      - scale
      kernel:
      - rbf
      - linear


In [12]:
!python src/pipelines/train.py \
    --config=experiments/train_config.yml \
    --base_config=config/pipeline_config.yml

Fitting 5 folds for each of 8 candidates, totalling 40 fits
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:    0.1s finished
0.957314307639385


In [13]:
# model is created
!ls models/

model.joblib


# Evaluate model 

In [14]:
# evaluate config
!cat experiments/evaluate_config.yml

metrics_file: eval.txt


In [17]:
!python src/pipelines/evaluate.py \
    --config=experiments/evaluate_config.yml \
    --base_config=config/pipeline_config.yml

{'f1_score': 1.0, 'confusion_matrix': [[10, 0, 0], [0, 9, 0], [0, 0, 11]]}
[<Experiment: artifact_location='file:///home/mlflow-1-tracking/mlruns/0', experiment_id='0', lifecycle_stage='active', name='LogisticRegression'>, <Experiment: artifact_location='file:///home/mlflow-1-tracking/mlruns/1', experiment_id='1', lifecycle_stage='active', name='SVC'>]
<ActiveRun: >
<RunInfo: artifact_uri='file:///home/mlflow-1-tracking/mlruns/1/ee9cea377619417fb0f5f45f6ce3c067/artifacts', end_time=None, experiment_id='1', lifecycle_stage='active', run_id='ee9cea377619417fb0f5f45f6ce3c067', run_uuid='ee9cea377619417fb0f5f45f6ce3c067', start_time=1561125885644, status='RUNNING', user_id='user'>
ee9cea377619417fb0f5f45f6ce3c067


In [18]:
# metrics file eval.txt is created
!ls experiments

base_config.yml  evaluate_config.yml   split_train_test_config.yml
eval.txt	 featurize_config.yml  train_config.yml


In [19]:
!cat experiments/eval.txt

{
  "f1_score": 1.0,
  "confusion_matrix": [
    [
      10,
      0,
      0
    ],
    [
      0,
      9,
      0
    ],
    [
      0,
      0,
      11
    ]
  ]
}

In [20]:
evaluate_report = yaml.load(open('experiments/eval.txt'), Loader=yaml.FullLoader)
evaluate_report

{'f1_score': 1.0, 'confusion_matrix': [[10, 0, 0], [0, 9, 0], [0, 0, 11]]}

# Train with another estimator

#### 1. Open config/pipeline_config.yml
#### 2. In section __train__ change _estimator_name_ to SVC
#### 3. Rerun stages __Train__ and __Evaluate__
#### 4. Go to next section __Enter MLflow ui__


# Enter MLflow ui
## http://0.0.0.0:5000