# Getting started with deep learning in Databricks: an end-to-end example using TensorFlow Keras, Hyperopt, and MLflow

This tutorial uses a small dataset to show how to use TensorFlow Keras, Hyperopt, and MLflow to develop a deep learning model in Databricks. 

It includes the following steps:
- Load and preprocess data
- Part 1. Create a neural network model with TensorFlow Keras and view training with inline TensorBoard
- Part 2. Perform automated hyperparameter tuning with Hyperopt and MLflow and use autologging to save results
- Part 3. Use the best set of hyperparameters to build a final model 
- Part 4. Register the model in MLflow and use the model to make predictions

### Setup
- Databricks Runtime for Machine Learning 7.0 or above. This notebook uses TensorBoard to display the results of neural network training. Depending on the version of Databricks Runtime you are using, you use different methods to start TensorBoard.

In [23]:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import load_img, img_to_array

import mlflow
import mlflow.keras
import mlflow.tensorflow

from math import sqrt
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os, PIL, time, cv2, json, shutil

import evalml
from evalml.preprocessing import load_data
from evalml.preprocessing import drop_nan_target_rows

from text_to_image.utilities import check_filename
from text_to_image.utilities import convert_char_to_int
from text_to_image.utilities import get_image_size




## Load and preprocess data
This example uses the California Housing dataset from `scikit-learn`.

In [24]:
y_train = pd.read_csv('../data/processed/y_train.csv')
X_train = pd.read_csv('../data/processed/X_train.csv')

y_test = pd.read_csv('../data/processed/y_test.csv')
X_test = pd.read_csv('../data/processed/X_test.csv')

X_train

Unnamed: 0,index,Beds,Baths,Area,Noise,PropertyType,DaysOnRealtor.com,YearBuilt,Latitude,Longitude,City_x,County,FemaInfo,FloodFactorInfo,LastSoldYear,SizeRank
0,622,3.0,2.5,1640.0,Medium,Townhome,104.0,1973.0,25.937100,-80.133733,North Miami Beach,Miami-Dade County,AE (),flood_factor_high,2009.0,79
1,811,3.0,3.5,1862.0,Medium,Condo,1021.0,2004.0,25.842794,-80.122788,Miami Beach,Miami-Dade County,AE (),flood_factor_high,2010.0,636
2,734,4.0,3.0,1932.0,Medium,Single Family Home,51.0,1987.0,28.044072,-82.401205,Tampa,Hillsborough County,X (),flood_factor_low,2004.0,1023
3,387,3.0,2.0,1750.0,Medium,Townhome,33.0,2005.0,27.752627,-82.414638,Apollo Beach,Hillsborough County,AE (),flood_factor_high,2012.0,5376
4,364,3.0,2.0,1732.0,Low,Single Family Home,17.0,1985.0,26.716497,-80.216349,Royal Palm Beach,Palm Beach County,X (),flood_factor_low,2017.0,90
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
745,840,2.0,2.0,1060.0,Medium,Condo,8.0,1979.0,26.204291,-80.272675,Fort Lauderdale,Broward County,X500 (),flood_factor_low,2004.0,401
746,102,4.0,3.0,2421.0,Medium,Single Family Home,29.0,1986.0,28.053214,-82.380432,Temple Terrace,Hillsborough County,X (),flood_factor_low,2019.0,1023
747,827,4.0,3.5,3392.0,Medium,Condo,41.0,2020.0,26.422176,-81.905178,Fort Myers Beach,Lee County,VE (),flood_factor_high,2017.0,5839
748,715,3.0,3.0,2137.0,Low,Condo,71.0,1992.0,26.439186,-81.920834,Fort Myers Beach,Lee County,AE (),flood_factor_high,2004.0,5839


In [25]:
from math import sqrt

def nearest_square(num):
    num1 = round(sqrt(num))**2
    return sqrt(num1)
        
nearest_square = nearest_square(len(X_train.columns))
nearest_square

4.0

In [26]:
missing_from_sq = nearest_square*nearest_square - len(X_train.columns)
missing_from_sq

0.0

In [27]:
add_missing_cols = ['feature_'+str(x) for x in range(len(X_train.columns), int(nearest_square*nearest_square))]
for col in add_missing_cols:
    X_train[col] = [np.nan for x in range(len(X_train))]

for col in add_missing_cols:
    X_test[col] = [np.nan for x in range(len(X_test))]


In [28]:
%%time

row = X_train.sample(1)
paths = []
ct=0

def concat_tile(im_list_2d):
    return cv2.vconcat([cv2.hconcat(im_list_h) for im_list_h in im_list_2d])

def tint_image(src, color="#FFFFFF"):
    src.load()
    r, g, b = src.split()
    gray = ImageOps.grayscale(src)
    result = ImageOps.colorize(gray, (0, 0, 0, 0), color) 
    return result


class encoder():
    def __init__(self):
        self.ct = 0
        
    def run(self, text, image_path, limit=256):
        """
        Take a string of text and encode it into an 8-bit grayscale image.
        :param str text: Text may be ASCII or UTF-8 but limit value must be changed accordingly.
        :param str image_path: Path to image file. Should have a '.png' extension or no extension
        :param int limit: The value limit for each pixel. 256 = 8 bit meaning all character encoding schemes using 8 or
        fewer bit can be encoded. If limit is 65536 then character encoding schemes using 16 bits or less can be applied
        e.g. UTF-8. When a character is used from a character set greater than the limit, the character value will be
        divided by the limit value. e.g. limit=256 character=Ĭ (value=300), resulting value will be 44. For values equal to
        the limit, the resulting value will be 1 to avoid NULL within the encoded data. Limit is the number of possible
        values in decimal from 1 to a max value. (default=256 i.e. 8 bit pixels/ 1- 256 means 255 possible values)
        :return str:  The path to the image produced.
        """
        if type(text) is not str:
            raise TypeError("Parameter 'text' must be a string.")
        text_length = len(text)
        size = get_image_size(text_length)
        result_path = check_filename(image_path, extension=".png")
        
        img = PIL.Image.new("RGB", size)  # grayscale, blank black image
        ind = 0
        for row in range(0, size[0]):
            for col in range(0, size[1]):
                if ind < text_length:  # only change pixel value for length of text
                    pixel_value = convert_char_to_int(text[ind], limit=limit)
                    img.putpixel((row, col), (255-pixel_value, pixel_value, 255-pixel_value))
                    ind += 1
                else:  # end of text, leave remaining pixel(s) black to indicate null
                    break
                    
        newsize = (24, 24)     
        img = img.resize(newsize)
        return np.array(img) 
    
    

class build_image():
    
    def __init__(self, data_x, data_y, savepath):        
        self.ct = 0
        self.l = []
        self.paths=[]
        self.t = time.time()
        self.enc = encoder()
        self.data_x = data_x
        self.data_y = data_y
        self.savepath = savepath
    
    def run(self, indx):
        row = self.data_x.iloc[[indx]]
        price = self.data_y.iloc[indx]
        for col in row.columns:
            for val in row[col].astype(str).str.encode(encoding = 'utf8').values:
                im = self.enc.run(str(val).replace("b'","").replace("'",""), "image"+str(self.ct)+".png")
                self.paths.append(im)
                self.ct+=1
                if self.ct % nearest_square == 0:
                    self.l.append(self.paths)
                    self.paths=[]

        im_tile = concat_tile(self.l)
        im = PIL.Image.fromarray(im_tile)
        newsize = (299, 299) 
        im = im.resize(newsize) 
        im.save(self.savepath+"opencvconcattile_"+str(indx)+"_"+str(price)+"_.png")
        self.l=[]
        if self.ct % 10000 == 0:
            print(indx, time.time() - self.t)
        self.t = time.time()



CPU times: user 812 µs, sys: 620 µs, total: 1.43 ms
Wall time: 1.43 ms


In [29]:
X_train

Unnamed: 0,index,Beds,Baths,Area,Noise,PropertyType,DaysOnRealtor.com,YearBuilt,Latitude,Longitude,City_x,County,FemaInfo,FloodFactorInfo,LastSoldYear,SizeRank
0,622,3.0,2.5,1640.0,Medium,Townhome,104.0,1973.0,25.937100,-80.133733,North Miami Beach,Miami-Dade County,AE (),flood_factor_high,2009.0,79
1,811,3.0,3.5,1862.0,Medium,Condo,1021.0,2004.0,25.842794,-80.122788,Miami Beach,Miami-Dade County,AE (),flood_factor_high,2010.0,636
2,734,4.0,3.0,1932.0,Medium,Single Family Home,51.0,1987.0,28.044072,-82.401205,Tampa,Hillsborough County,X (),flood_factor_low,2004.0,1023
3,387,3.0,2.0,1750.0,Medium,Townhome,33.0,2005.0,27.752627,-82.414638,Apollo Beach,Hillsborough County,AE (),flood_factor_high,2012.0,5376
4,364,3.0,2.0,1732.0,Low,Single Family Home,17.0,1985.0,26.716497,-80.216349,Royal Palm Beach,Palm Beach County,X (),flood_factor_low,2017.0,90
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
745,840,2.0,2.0,1060.0,Medium,Condo,8.0,1979.0,26.204291,-80.272675,Fort Lauderdale,Broward County,X500 (),flood_factor_low,2004.0,401
746,102,4.0,3.0,2421.0,Medium,Single Family Home,29.0,1986.0,28.053214,-82.380432,Temple Terrace,Hillsborough County,X (),flood_factor_low,2019.0,1023
747,827,4.0,3.5,3392.0,Medium,Condo,41.0,2020.0,26.422176,-81.905178,Fort Myers Beach,Lee County,VE (),flood_factor_high,2017.0,5839
748,715,3.0,3.0,2137.0,Low,Condo,71.0,1992.0,26.439186,-81.920834,Fort Myers Beach,Lee County,AE (),flood_factor_high,2004.0,5839


In [30]:
target = 'yearly_price_delta_percent'

data = X_train.drop('index',axis=1).reset_index().set_index('index')
data[target] = y_train.drop('index',axis=1).reset_index().set_index('index')
X_train = data.drop(target,axis=1)
y_train = data[target]
X_train

Unnamed: 0_level_0,Beds,Baths,Area,Noise,PropertyType,DaysOnRealtor.com,YearBuilt,Latitude,Longitude,City_x,County,FemaInfo,FloodFactorInfo,LastSoldYear,SizeRank
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,3.0,2.5,1640.0,Medium,Townhome,104.0,1973.0,25.937100,-80.133733,North Miami Beach,Miami-Dade County,AE (),flood_factor_high,2009.0,79
1,3.0,3.5,1862.0,Medium,Condo,1021.0,2004.0,25.842794,-80.122788,Miami Beach,Miami-Dade County,AE (),flood_factor_high,2010.0,636
2,4.0,3.0,1932.0,Medium,Single Family Home,51.0,1987.0,28.044072,-82.401205,Tampa,Hillsborough County,X (),flood_factor_low,2004.0,1023
3,3.0,2.0,1750.0,Medium,Townhome,33.0,2005.0,27.752627,-82.414638,Apollo Beach,Hillsborough County,AE (),flood_factor_high,2012.0,5376
4,3.0,2.0,1732.0,Low,Single Family Home,17.0,1985.0,26.716497,-80.216349,Royal Palm Beach,Palm Beach County,X (),flood_factor_low,2017.0,90
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
745,2.0,2.0,1060.0,Medium,Condo,8.0,1979.0,26.204291,-80.272675,Fort Lauderdale,Broward County,X500 (),flood_factor_low,2004.0,401
746,4.0,3.0,2421.0,Medium,Single Family Home,29.0,1986.0,28.053214,-82.380432,Temple Terrace,Hillsborough County,X (),flood_factor_low,2019.0,1023
747,4.0,3.5,3392.0,Medium,Condo,41.0,2020.0,26.422176,-81.905178,Fort Myers Beach,Lee County,VE (),flood_factor_high,2017.0,5839
748,3.0,3.0,2137.0,Low,Condo,71.0,1992.0,26.439186,-81.920834,Fort Myers Beach,Lee County,AE (),flood_factor_high,2004.0,5839


In [31]:
data = X_test.drop('index',axis=1).reset_index().set_index('index')
data[target] = y_test.drop('index',axis=1).reset_index().set_index('index')
X_test = data.drop(target,axis=1)
y_test = data[target]
X_test

Unnamed: 0_level_0,Beds,Baths,Area,Noise,PropertyType,DaysOnRealtor.com,YearBuilt,Latitude,Longitude,City_x,County,FemaInfo,FloodFactorInfo,LastSoldYear,SizeRank
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,5.0,5.5,5123.0,Medium,Single Family Home,164.0,2005.0,26.143917,-80.303507,Plantation,Broward County,AH (),flood_factor_low,2016.0,6109
1,2.0,1.5,1075.0,Medium,Condo,172.0,1976.0,25.996491,-80.120727,Hallandale Beach,Broward County,AE (),flood_factor_high,2011.0,230
2,4.0,3.0,2790.0,Medium,Single Family Home,55.0,2014.0,27.780161,-82.380489,Apollo Beach,Hillsborough County,X (),flood_factor_low,2014.0,5376
3,3.0,2.0,1446.0,Low,Single Family Home,22.0,1980.0,27.858340,-82.806344,Seminole,Pinellas County,X (),flood_factor_low,2020.0,3848
4,3.0,2.5,1815.0,Medium,Single Family Home,190.0,1999.0,27.273789,-80.241492,Jensen Beach,St. Lucie County,X (),flood_factor_low,2013.0,2713
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183,3.0,3.0,2396.0,Medium,Single Family Home,121.0,2000.0,27.096781,-80.144033,Hobe Sound,Martin County,X (),flood_factor_high,1999.0,4235
184,3.0,2.0,1404.0,Medium,Single Family Home,9.0,1981.0,27.890649,-82.773214,Largo,Pinellas County,AE (),flood_factor_low,2020.0,6070
185,3.0,3.5,3265.0,Medium,Condo,240.0,2019.0,25.823095,-80.131016,Miami Beach,Miami-Dade County,AE (),flood_factor_high,2019.0,2639
186,3.0,2.0,1858.0,,Single Family Home,3.0,1985.0,27.759939,-80.584307,Fellsmere,Indian River County,A (),flood_factor_low,2020.0,12426


In [15]:
import shutil, os
shutil.rmtree('../data/colorframes')
shutil.rmtree('../data/train')
shutil.rmtree('../data/test')
os.mkdir('../data/colorframes')
os.mkdir('../data/train')
os.mkdir('../data/test')

In [16]:
#create a colorframe sample to test that things work as expected
s = pd.DataFrame({})
s['indx'] = [indx for indx in [35]]
bi = build_image(X_train, y_train, '../data/colorframes/')
s['indx'].apply(bi.run)

0    None
Name: indx, dtype: object

In [17]:
s = pd.DataFrame({})
s['indx'] = [indx for indx in y_train.index]
bi = build_image(X_train, y_train, '../data/train/')
s['indx'].apply(bi.run)

0      None
1      None
2      None
3      None
4      None
       ... 
745    None
746    None
747    None
748    None
749    None
Name: indx, Length: 750, dtype: object

In [18]:
s = pd.DataFrame({})
s['indx'] = [indx for indx in y_test.index]
bi = build_image(X_test, y_test, '../data/test/')
s['indx'].apply(bi.run)

0      None
1      None
2      None
3      None
4      None
       ... 
183    None
184    None
185    None
186    None
187    None
Name: indx, Length: 188, dtype: object

In [32]:
directory = '../data/train'
dirr = ['../data/train/'+filename for filename in os.listdir(directory) if filename.endswith('.png')]
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array

train_images = []
test_images = []
train_yy = []
test_yy = []

for d in dirr:
    im = load_img(d)
    train_images.append(img_to_array(im))
    train_yy.append(round(float(d.split('_')[2]),4))
    
    
directory = '../data/test'
dirr = ['../data/test/'+filename for filename in os.listdir(directory) if filename.endswith('.png')]

for d in dirr:
    im = load_img(d)
    test_images.append(img_to_array(im))
    test_yy.append(round(float(d.split('_')[2]),4))

## Part 1. Create model and view TensorBoard in notebook

In [33]:
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.applications import MobileNet, VGG16, ResNet50, Xception, MobileNetV2
from tensorflow.keras.layers import Conv2D, Flatten, Dense, Activation, Dense, Flatten, MaxPooling2D, Dropout
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model

# import python.keras.engine
from tensorflow.keras.layers import Layer, InputSpec
from tensorflow.keras.utils import get_file
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.imagenet_utils import preprocess_input

### Create callbacks

In [34]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# In the following lines, replace <username> with your username.
experiment_log_dir = "../data/tb"
checkpoint_path = "../data/keras_checkpoint_weights.ckpt"

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=experiment_log_dir)
model_checkpoint = ModelCheckpoint(filepath=checkpoint_path, verbose=1, save_best_only=True)
early_stopping = EarlyStopping(monitor="loss", mode="min", patience=3)


### TensorBoard commands for Databricks Runtime 7.2 ML and above

When you start TensorBoard this way, it continues to run until you detach the notebook from the cluster.  
Note: to clear the TensorBoard between runs, use this command: `dbutils.fs.rm(experiment_log_dir.replace("/dbfs",""), recurse=True)`

In [35]:
%load_ext tensorboard
%tensorboard --logdir $experiment_log_dir

Reusing TensorBoard on port 6006 (pid 35748), started 7:20:12 ago. (Use '!kill 35748' to kill it.)

## Part 2. Hyperparameter tuning with Hyperopt and MLflow
[Hyperopt](https://github.com/hyperopt/hyperopt) is a Python library for hyperparameter tuning. Databricks Runtime for Machine Learning includes an optimized and enhanced version of Hyperopt, including automated MLflow tracking. For more information about using Hyperopt, see the [Hyperopt documentation](https://github.com/hyperopt/hyperopt/wiki/FMin).

### Create neural network model using variables for number of nodes in hidden layers

In [36]:
pname=''
mname = 'imgframe'

# Set the dimension of the images 
img_width = 299 
img_height = 299

#Create a bottleneck file
model_path = '../models/mname.h5'.replace('mname',mname)
json_path = '../models/mname.json'.replace('mname',mname)
mlmodel_path = '../models/mname.mlmodel'.replace('mname',mname)

# loading up our datasets
train_data_dir = '../data/train'.replace('pname',pname)
validation_data_dir = '../data/test'.replace('pname',pname) 
test_data_dir = '../data/test'.replace('pname',pname)

# TensorFlow is the backend, so ordering of input_shape is as below
input_shape = (img_width, img_height, 3)

In [37]:

def create_model(n):
    # Function to create model
    # def create_model(learn_rate=1e-1, momentum=0.9, dropout_rate=0.0):
    sys.stdout.write('Loading new model\n\n')
    sys.stdout.flush()
    base_model = Xception(input_shape=input_shape, 
                        include_top=False, 
                        pooling='avg', 
                        weights='imagenet')

    for layer in base_model.layers:
        layer.trainable = False

    x2 = Dropout(n["dropout_rate"])(base_model.layers[-1].output)
    x = Dense(1, activation='linear', name='classLabels')(x2)
    model = Model(base_model.input, x)
    model.summary()

    # Save model
    with open(model_path, 'w') as outfile:
        json.dump(model.to_json(), outfile)
        outfile.close()

    return model


### Create Hyperopt objective function

In [40]:
from hyperopt import fmin, hp, tpe, STATUS_OK, SparkTrials
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping


def runNN(n):
    # Import tensorflow 
    import tensorflow as tf

    # Log run information with mlflow.tensorflow.autolog()
    mlflow.tensorflow.autolog()

    model = create_model(n)

    # Select optimizer
    optimizer_call = getattr(tf.keras.optimizers, n["optimizer"])

    if n["optimizer"] == 'SGD': optimizer = optimizer_call(learning_rate=n["learning_rate"], momentum=n["momentum"])
    else: optimizer = optimizer_call(learning_rate=n["learning_rate"])

    # Compile model
    model.compile(optimizer=optimizer, 
                loss='mean_absolute_error', 
                metrics=['mse', 
                         'mean_absolute_percentage_error', 
                         'mean_absolute_error'])

    # In the following lines, replace <username> with your username.
    experiment_log_dir = "../data/tb"
    checkpoint_path = "../data/keras_checkpoint_weights.ckpt"

    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=experiment_log_dir)
    model_checkpoint = ModelCheckpoint(filepath=checkpoint_path, verbose=1, save_best_only=True)
    early_stopping = EarlyStopping(monitor="val_mean_absolute_error", mode="min", patience=1)

    history = model.fit(np.array(train_images), np.array(train_yy), 
                                  validation_split=.2, 
                                  epochs=n["epochs"], 
                                  verbose=4,
                                  batch_size=n["batch_size"],
                                  callbacks=[tensorboard_callback, model_checkpoint, early_stopping]
                                 )

    # Evaluate the model
    score = model.evaluate(np.array(test_images), np.array(test_yy), verbose=1)
    obj_metric = score[0]  
    return {"loss": obj_metric, "status": STATUS_OK, "model":model}

### Define Hyperopt search space

In [39]:
space = {
  "learning_rate": hp.loguniform("learning_rate", -7, 0),
  "optimizer": hp.choice("optimizer", ["Adam"]),
  "batch_size": hp.choice("batch_size", [2, 4,]),
  "epochs": hp.choice("epochs", [1, 3, 5]),
  "dropout_rate": hp.choice("dropout_rate", [0.0, 0.1, 0.2]),
 }


### Create the `SparkTrials` object

The `SparkTrials` object tells `fmin()` to distribute the tuning job across a Spark cluster. When you create the `SparkTrials` object, you can use the `parallelism` argument to set the maximum number of trials to evaluate concurently. The default setting is the number of Spark executors available.  

A higher number lets you scale-out testing of more hyperparameter settings. Because Hyperopt proposes new trials based on past results, there is a trade-off between parallelism and adaptivity. For a fixed `max_evals`, greater parallelism speeds up calculations, but lower parallelism may lead to better results since each iteration has access to more past results.

In [22]:
# If you do not specify a parallelism argument, the default is the number of available Spark executors 
spark_trials = SparkTrials()

### Perform hyperparameter tuning 
Put the `fmin()` call inside an MLflow run to save results to MLflow. MLflow tracks the parameters and performance metrics of each run.   

After running the following cell, you can view the results in MLflow. Click **Experiment** at the upper right to display the Experiment Runs sidebar. Click the icon at the far right next to **Experiment Runs** to display the MLflow Runs Table.

For more information about using MLflow to analyze runs, see ([AWS](https://docs.databricks.com/applications/mlflow/index.html)|[Azure](https://docs.microsoft.com/azure/databricks/applications/mlflow/)).

In [0]:
mlflow.end_run()

with mlflow.start_run():
    best_hyperparam = fmin(fn=runNN, 
                         space=space, 
                         algo=tpe.suggest, 
                         max_evals=30, 
                         trials=spark_trials)

In [48]:
n = {'batch_size': 4, 'dropout_rate': 0.0, 'epochs': 4, 'learning_rate': 0.001037950994714726, 'momentum': 0.4, 'optimizer': 'Adam'}
model = runNN(n)
model

Loading new model

Model: "functional_7"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, 299, 299, 3) 0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 149, 149, 32) 864         input_4[0][0]                    
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 149, 149, 32) 128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv1_act (Activation)   (None, 149, 149, 32) 0           block1_conv1_bn[0][0]            
____________________________________________________________________

2021/02/28 21:03:16 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '58baf244ed33492f904600a6beb92075', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/4

Epoch 00001: val_loss improved from inf to 2.15134, saving model to ../data/keras_checkpoint_weights.ckpt
INFO:tensorflow:Assets written to: ../data/keras_checkpoint_weights.ckpt/assets
Epoch 2/4

Epoch 00002: val_loss improved from 2.15134 to 1.53903, saving model to ../data/keras_checkpoint_weights.ckpt
INFO:tensorflow:Assets written to: ../data/keras_checkpoint_weights.ckpt/assets
Epoch 3/4

Epoch 00003: val_loss improved from 1.53903 to 1.43144, saving model to ../data/keras_checkpoint_weights.ckpt
INFO:tensorflow:Assets written to: ../data/keras_checkpoint_weights.ckpt/assets
Epoch 4/4

Epoch 00004: val_loss did not improve from 1.43144
INFO:tensorflow:Assets written to: /var/folders/x4/lln_2cf520d7_dbpv40m1jx40000gq/T/tmpslk26gyu/model/data/model/assets


{'loss': 2.769727945327759,
 'status': 'ok',
 'model': <tensorflow.python.keras.engine.functional.Functional at 0x7fbf75c0a350>}

## Part 3. Use the best set of hyperparameters to build a final model

In [0]:
import hyperopt
print(hyperopt.space_eval(space, best_hyperparam))

In [0]:
dropout_rate = hyperopt.space_eval(space, best_hyperparam)["dropout_rate"]
batch_size = hyperopt.space_eval(space, best_hyperparam)["batch_size"]
epochs = hyperopt.space_eval(space, best_hyperparam)["epochs"]
momentum = hyperopt.space_eval(space, best_hyperparam)["momentum"]
learning_rate = hyperopt.space_eval(space, best_hyperparam)["learning_rate"]
optimizer = hyperopt.space_eval(space, best_hyperparam)["optimizer"]

In [0]:
# Get optimizer and update with learning_rate value
optimizer_call = getattr(tf.keras.optimizers, optimizer)
optimizer = optimizer_call(learning_rate=learning_rate)

In [0]:
def create_new_model():
    # Function to create model
    # def create_model(learn_rate=1e-1, momentum=0.9, dropout_rate=0.0):
    sys.stdout.write('Loading new model\n\n')
    sys.stdout.flush()
    base_model = Xception(input_shape=input_shape, 
                        include_top=False, 
                        pooling='avg', 
                        weights='imagenet')

    for layer in base_model.layers:
        layer.trainable = False

    x2 = Dropout(dropout_rate)(base_model.layers[-1].output)
    x = Dense(1, activation='linear', name='classLabels')(x2)
    model = Model(base_model.input, x)
    model.summary()

    # Save model
    with open(model_path, 'w') as outfile:
        json.dump(model.to_json(), outfile)
        outfile.close()

    return model

In [0]:
new_model = create_new_model()
  
# Compile model
new_model.compile(optimizer=optimizer, 
                loss='mean_absolute_error', 
                metrics=['mse', 
                         'mean_absolute_percentage_error', 
                         'mean_absolute_error'])

When `autolog()` is active, MLflow does not automatically end a run. We need to end the run that was started in Cmd 30 before starting and autologging a new run.  
For more information, see https://www.mlflow.org/docs/latest/tracking.html#automatic-logging.

In [0]:
mlflow.end_run()

In [0]:
epochs

In [0]:
import matplotlib.pyplot as plt

mlflow.tensorflow.autolog()

with mlflow.start_run() as run:
  
  history = new_model.fit(np.array(train_images), 
                          np.array(train_yy), 
#                                   validation_split=.2, 
                                  epochs=epochs, 
                                  verbose=1,
                                  batch_size=batch_size,
#                                   callbacks=[tensorboard_callback, 
#                                              model_checkpoint, 
#                                              early_stopping]
                                 )
  
  # Save the run information to register the model later
  kerasURI = run.info.artifact_uri
  
  # Evaluate model on test dataset and log result
  mlflow.log_param("eval_result", 
                   new_model.evaluate(np.array(test_images), 
                                      np.array(test_yy))[0])
  
  # Plot predicted vs known values for a quick visual check of the model and log the plot as an artifact
  keras_pred = new_model.predict(np.array(test_images))
  plt.plot(np.array(test_yy), keras_pred, "o", markersize=2)
  plt.xlabel("observed value")
  plt.ylabel("predicted value")
  plt.savefig("kplot.png")
  mlflow.log_artifact("kplot.png") 

## Part 4. Register the model in MLflow and use the model to make predictions
To learn more about the Model Registry, see ([AWS](https://docs.databricks.com/applications/mlflow/model-registry.html)|[Azure](https://docs.microsoft.com/azure/databricks/applications/mlflow/model-registry)).

In [0]:
model_path = '/dbfs/FileStore/tables/blogs/models/colorframe_model.h5'
json_path = '/dbfs/FileStore/tables/blogs/models/colorframe_model.json'

# new_model.save_weights(model_path)
# print('Saved trained model at %s ' % model_path)

model_json = new_model.to_json()
open(json_path, 'w').write(model_json)

# mlflow.log_model(model_path, 'model_path')
mlflow.log_artifact(json_path, 'json_path')

In [0]:
import time

model_name = "colorframe"
model_uri = kerasURI+"/model"
new_model_version = mlflow.register_model(model_uri, model_name)

# Registering the model takes a few seconds, so add a delay before continuing with the next cell
time.sleep(15)

### Load the model for inference and make predictions

In [0]:
keras_model = mlflow.keras.load_model(f"models:/{model_name}/{new_model_version.version}")

keras_pred = keras_model.predict(np.array(test_images))
keras_pred

In [0]:
# new_model.save_weights(model_path)
new_model.save_weights('/dbfs/FileStore/tables/blogs/h5weights')

In [0]:
new_model.save('/dbfs/FileStore/tables/blogs/h5model')

In [0]:
mlflow.log_artifact('/dbfs/FileStore/tables/blogs/h5model')

In [43]:
preds = np.array([[2.5374541 ], [3.2678769 ], [3.0341623 ], [3.9246533 ], [5.3540707 ], [2.2571735 ], [2.357383 ], [4.071934 ], [2.6663194 ], [2.3384793 ], [3.8936331 ], [3.0537953 ], [2.6437616 ], [5.248788 ], [3.3744543 ], [1.9989566 ], [3.4469922 ], [3.2478435 ], [3.3308902 ], [2.7365532 ], [2.446532 ], [4.16877 ], [4.1584864 ], [3.447427 ], [2.271582 ], [2.7618506 ], [3.147385 ], [3.3469748 ], [4.369869 ], [3.3367028 ], [3.2269151 ], [3.4688303 ], [4.120167 ], [2.078065 ], [4.019703 ], [5.041812 ], [3.50992 ], [2.4590092 ], [4.162942 ], [1.8680092 ], [2.7311177 ], [2.5004175 ], [1.971567 ], [4.066986 ], [3.2532208 ], [3.642503 ], [1.4995602 ], [4.7609386 ], [2.3255336 ], [2.3952088 ], [3.187093 ], [3.4802458 ], [3.7456691 ], [2.8016112 ], [2.1244657 ], [2.9116685 ], [3.7208135 ], [2.9225452 ], [2.7336068 ], [3.376431 ], [3.182278 ], [2.2641835 ], [3.5075967 ], [2.9265978 ], [2.6397583 ], [3.5653908 ], [2.9789968 ], [3.3346128 ], [1.9328927 ], [2.963746 ], [2.76102 ], [3.7848423 ], [1.660465 ], [4.7199183 ], [4.1745787 ], [1.6536111 ], [3.3782952 ], [2.970159 ], [3.5605137 ], [2.9517906 ], [3.2952628 ], [2.273289 ], [4.1876373 ], [1.4010222 ], [3.7049582 ], [3.52909 ], [3.014873 ], [3.9356582 ], [2.880937 ], [3.2932928 ], [4.424321 ], [2.8027363 ], [3.5936809 ], [2.848199 ], [3.0179572 ], [3.2322586 ], [0.0882553 ], [4.9161186 ], [1.4682242 ], [3.0375245 ], [2.7371447 ], [2.922221 ], [1.9005984 ], [3.9673944 ], [1.1328233 ], [2.9814308 ], [2.7574532 ], [4.6011615 ], [4.393426 ], [3.647125 ], [2.4305787 ], [2.0360255 ], [2.0554073 ], [3.584027 ], [3.1583822 ], [3.253896 ], [2.9442368 ], [2.8312666 ], [3.488383 ], [3.4391968 ], [3.9078705 ], [2.7853186 ], [3.2216456 ], [2.7905474 ], [3.0011642 ], [3.3205035 ], [2.586902 ], [3.9559863 ], [3.0187619 ], [3.047871 ], [2.397868 ], [3.2428887 ], [3.5078952 ], [2.1818392 ], [2.5248778 ], [2.4667773 ], [3.030227 ], [3.248349 ], [4.819742 ], [3.047447 ], [3.5179374 ], [2.1998146 ], [0.6026546 ], [3.8684669 ], [3.1930377 ], [1.8060614 ], [3.0088873 ], [1.4026772 ], [3.4012291 ], [2.3242316 ], [4.1493683 ], [3.7922447 ], [2.4385438 ], [3.0675828 ], [2.279734 ], [3.072283 ], [3.7030537 ], [4.359474 ], [2.5870302 ], [3.1755843 ], [4.4895167 ], [1.8151201 ], [2.350835 ], [3.2439404 ], [2.6312222 ], [4.2802467 ], [3.1646845 ], [2.29786 ], [2.5932682 ], [2.7738147 ], [3.1757994 ], [3.527126 ], [2.1753094 ], [3.2313006 ], [0.87366164], [1.1633948 ], [2.543244 ], [4.0536847 ], [4.5483127 ], [2.8635612 ], [1.0507253 ], [3.1183274 ], [2.4314158 ], [3.8431022 ], [2.538572 ], [2.2237988 ], [4.3369594 ], [2.5413868 ]], dtype=float)
preds

array([[2.5374541 ],
       [3.2678769 ],
       [3.0341623 ],
       [3.9246533 ],
       [5.3540707 ],
       [2.2571735 ],
       [2.357383  ],
       [4.071934  ],
       [2.6663194 ],
       [2.3384793 ],
       [3.8936331 ],
       [3.0537953 ],
       [2.6437616 ],
       [5.248788  ],
       [3.3744543 ],
       [1.9989566 ],
       [3.4469922 ],
       [3.2478435 ],
       [3.3308902 ],
       [2.7365532 ],
       [2.446532  ],
       [4.16877   ],
       [4.1584864 ],
       [3.447427  ],
       [2.271582  ],
       [2.7618506 ],
       [3.147385  ],
       [3.3469748 ],
       [4.369869  ],
       [3.3367028 ],
       [3.2269151 ],
       [3.4688303 ],
       [4.120167  ],
       [2.078065  ],
       [4.019703  ],
       [5.041812  ],
       [3.50992   ],
       [2.4590092 ],
       [4.162942  ],
       [1.8680092 ],
       [2.7311177 ],
       [2.5004175 ],
       [1.971567  ],
       [4.066986  ],
       [3.2532208 ],
       [3.642503  ],
       [1.4995602 ],
       [4.760

In [44]:
p = []
for pred in preds:
    p.append(pred[0])
p

[2.5374541,
 3.2678769,
 3.0341623,
 3.9246533,
 5.3540707,
 2.2571735,
 2.357383,
 4.071934,
 2.6663194,
 2.3384793,
 3.8936331,
 3.0537953,
 2.6437616,
 5.248788,
 3.3744543,
 1.9989566,
 3.4469922,
 3.2478435,
 3.3308902,
 2.7365532,
 2.446532,
 4.16877,
 4.1584864,
 3.447427,
 2.271582,
 2.7618506,
 3.147385,
 3.3469748,
 4.369869,
 3.3367028,
 3.2269151,
 3.4688303,
 4.120167,
 2.078065,
 4.019703,
 5.041812,
 3.50992,
 2.4590092,
 4.162942,
 1.8680092,
 2.7311177,
 2.5004175,
 1.971567,
 4.066986,
 3.2532208,
 3.642503,
 1.4995602,
 4.7609386,
 2.3255336,
 2.3952088,
 3.187093,
 3.4802458,
 3.7456691,
 2.8016112,
 2.1244657,
 2.9116685,
 3.7208135,
 2.9225452,
 2.7336068,
 3.376431,
 3.182278,
 2.2641835,
 3.5075967,
 2.9265978,
 2.6397583,
 3.5653908,
 2.9789968,
 3.3346128,
 1.9328927,
 2.963746,
 2.76102,
 3.7848423,
 1.660465,
 4.7199183,
 4.1745787,
 1.6536111,
 3.3782952,
 2.970159,
 3.5605137,
 2.9517906,
 3.2952628,
 2.273289,
 4.1876373,
 1.4010222,
 3.7049582,
 3.52909,

In [45]:
from sklearn.metrics import mean_absolute_error

mean_absolute_error(np.array(test_yy), p)

3.1269428326595747