In [46]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
# import logging
import tempfile
import os
import shutil

from builtins import int
from mlflow import pyfunc
from tensorflow.python.saved_model import tag_constants
from time import time

class DictX(dict):
    def __getattr__(self, key):
        try:
            return self[key]
        except KeyError as k:
            raise AttributeError(k)

    def __setattr__(self, key, value):
        self[key] = value

    def __delattr__(self, key):
        try:
            del self[key]
        except KeyError as k:
            raise AttributeError(k)

    def __repr__(self):
        return '<DictX ' + dict.__repr__(self) + '>'

def _mlflow_log_metrics(metrics, metric_name):
    """Record metric value during each epoch using the step parameter in
    mlflow.log_metric.

    :param metrics:
    :param metric_name:
    :return:
    """
    for epoch, metric in enumerate(metrics[metric_name], 1): mlflow.log_metric(
        metric_name, metric,
        step=epoch)

In [2]:
import os
import sys
src_dir = os.path.join(os.getcwd(), 'trainer')
sys.path.append(src_dir)

  and should_run_async(code)


In [24]:
import model
import utils
import model_deployment

In [4]:
import mlflow
import mlflow.tensorflow
import tensorflow as tf

In [12]:
class Log:
    def __init__(self):
        pass
    
    def info(self, text):
        print(text)

logging = Log()

In [43]:
args = {
    'reuse_job_dir': False,
    'job_dir': 'mlflow',
    'train_files':'gs://cloud-samples-data/ml-engine/census/data/adult.data.csv',
    'eval_files': 'gs://cloud-samples-data/ml-engine/census/data/adult.test.csv',
    'learning_rate': .01,
    'num_epochs': 5,
    'batch_size': 128,
    'eval_steps':1
}
args = DictX(args)

In [18]:
if not args.reuse_job_dir:
    print('paso')
    print(tf.io.gfile.exists(args.job_dir))
    if tf.io.gfile.exists(args.job_dir):
        tf.io.gfile.rmtree(args.job_dir)
        logging.info(
            'Deleted job_dir {} to avoid re-use'.format(args.job_dir))
else:
    logging.info('Reusing job_dir {} if it exists'.format(args.job_dir))

paso
False


In [16]:
logging.info('Reusing job_dir {} if it exists'.format(args.job_dir))

Reusing job_dir mlflow if it exists


In [19]:
train_x, train_y, eval_x, eval_y = utils.load_data(args.train_files, args.eval_files)
# dimensions
num_train_examples, input_dim = train_x.shape
num_eval_examples = eval_x.shape[0]

Location train file: gs://cloud-samples-data/ml-engine/census/data/adult.data.csv, eval file gs://cloud-samples-data/ml-engine/census/data/adult.test.csv


In [66]:
# Create the Keras Model
keras_model = model.create_keras_model(input_dim=input_dim, learning_rate=args.learning_rate)

In [59]:
# Pass a numpy array by passing DataFrame.values
training_dataset = model.input_fn(
    features=train_x.values,
    labels=train_y,
    shuffle=True,
    num_epochs=args.num_epochs,
    batch_size=args.batch_size)

# Pass a numpy array by passing DataFrame.values
validation_dataset = model.input_fn(
    features=eval_x.values,
    labels=eval_y,
    shuffle=False,
    num_epochs=args.num_epochs,
    batch_size=num_eval_examples)

In [139]:
with mlflow.start_run() as active_run:
    run_id = active_run.info.run_id

In [140]:
# mlflow.end_run()
run_id

'976a46c4c2d0445cb50d330c801a00b8'

In [62]:
# Callbacks
class MlflowCallback(tf.keras.callbacks.Callback):
    # This function will be called after training completes.
    def on_train_end(self, logs=None):
        mlflow.log_param('num_layers', len(self.model.layers))
        mlflow.log_param('optimizer_name',
                         type(self.model.optimizer).__name__)
# MLflow callback
mlflow_callback = MlflowCallback()
# Setup Learning Rate decay callback.
lr_decay_callback = tf.keras.callbacks.LearningRateScheduler(
    lambda epoch: args.learning_rate + 0.02 * (0.5 ** (1 + epoch)),
    verbose=False)
# Setup TensorBoard callback.
tensorboard_path = os.path.join(args.job_dir, run_id, 'tensorboard')
tensorboard_callback = tf.keras.callbacks.TensorBoard(
    tensorboard_path,
    histogram_freq=1)

In [80]:
keras_model = model.create_keras_model(input_dim=input_dim, learning_rate=args.learning_rate)

In [81]:
history = keras_model.fit(
    training_dataset,
    steps_per_epoch=int(num_train_examples / args.batch_size),
    epochs=args.num_epochs,
    validation_data=validation_dataset,
    validation_steps=args.eval_steps,
    verbose=1,
    callbacks=[lr_decay_callback, tensorboard_callback,
               mlflow_callback])
metrics = history.history

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [45]:
metrics

{'loss': [0.5243113040924072,
  0.37130650877952576,
  0.3430071175098419,
  0.3362666368484497,
  0.33429110050201416],
 'accuracy': [0.7890625,
  0.8304625749588013,
  0.8407357335090637,
  0.8441498279571533,
  0.8464567065238953],
 'val_loss': [0.39031627774238586,
  0.355435311794281,
  0.33384740352630615,
  0.33359837532043457,
  0.3242957293987274],
 'val_accuracy': [0.8184052109718323,
  0.8338862061500549,
  0.8491215109825134,
  0.8453741073608398,
  0.850411593914032],
 'lr': [0.02, 0.015, 0.0125, 0.01125, 0.010625]}

In [47]:
keras_model.summary()
mlflow.log_param('train_files', args.train_files)
mlflow.log_param('eval_files', args.eval_files)
mlflow.log_param('num_epochs', args.num_epochs)
mlflow.log_param('batch_size', args.batch_size)
mlflow.log_param('learning_rate', args.learning_rate)
mlflow.log_param('train_samples', num_train_examples)
mlflow.log_param('eval_samples', num_eval_examples)
mlflow.log_param('eval_steps', args.eval_steps)
mlflow.log_param('steps_per_epoch',
                 int(num_train_examples / args.batch_size))
# Add metrics
_mlflow_log_metrics(metrics, 'loss')
_mlflow_log_metrics(metrics, 'accuracy')
_mlflow_log_metrics(metrics, 'val_loss')
_mlflow_log_metrics(metrics, 'val_accuracy')
_mlflow_log_metrics(metrics, 'lr')

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               1200      
_________________________________________________________________
dense_1 (Dense)              (None, 75)                7575      
_________________________________________________________________
dense_2 (Dense)              (None, 50)                3800      
_________________________________________________________________
dense_3 (Dense)              (None, 25)                1275      
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 26        
Total params: 13,876
Trainable params: 13,876
Non-trainable params: 0
_________________________________________________________________


In [99]:
model_local_path = os.path.join(args.job_dir, run_id, 'model')
# model_local_path = 'model3'

In [95]:
import numpy as np
a = [[1,2,3,4]]
np.save(model_local_path, a )

In [101]:
from pathlib import Path
import numpy
import pandas as pd

data_dir_raw = Path()

In [141]:
model_local_path = data_dir_raw / args.job_dir / run_id / 'model'

In [142]:
model_local_path

WindowsPath('mlflow/976a46c4c2d0445cb50d330c801a00b8/model')

In [146]:
model_local_path = 'mlflow/976a46c4c2d/model'

In [147]:
tf.keras.models.save_model(
    keras_model, model_local_path,
    overwrite=True,
    include_optimizer=True,
    save_format=None,
    signatures=None,
    options=None
)

INFO:tensorflow:Assets written to: mlflow/976a46c4c2d/model\assets


In [48]:
model_local_path = os.path.join(args.job_dir, run_id, 'model')
# print(model_local_path)
tf.saved_model.save(keras_model, model_local_path)
# Define artifacts.
logging.info('Model exported to: {}'.format(model_local_path))

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


NotFoundError: Failed to create a NewWriteableFile: mlflow\7e1b6daeea0e4d988ad3f0c2595c3aaa\model\variables\variables_temp_4a16e86dff25474b962377846c108e7e/part-00000-of-00001.data-00000-of-00001.tempstate9155103110247276991 : El sistema no puede encontrar la ruta especificada.
; No such process [Op:SaveV2]