# Customer propensity to purchase

In [1]:
# default_exp train

# Prepare data 

## Prepare bucket

In [1]:
#hide
import os
import boto3

mlflow_bucket_name="mlflow"
s3_client = boto3.client('s3', endpoint_url=os.environ['MLFLOW_S3_ENDPOINT_URL'])
s3_client.create_bucket(Bucket=mlflow_bucket_name)

{'ResponseMetadata': {'RequestId': '168F4EA0EC2DCE51',
  'HostId': '',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'accept-ranges': 'bytes',
   'content-length': '0',
   'content-security-policy': 'block-all-mixed-content',
   'location': '/mlflow',
   'server': 'MinIO',
   'vary': 'Origin',
   'x-amz-request-id': '168F4EA0EC2DCE51',
   'x-xss-protection': '1; mode=block',
   'date': 'Tue, 06 Jul 2021 20:47:23 GMT'},
  'RetryAttempts': 0},
 'Location': '/mlflow'}

In [29]:
#export
import os
import pandas as pd
import warnings
import sys
import numpy as np
import joblib
import tensorflow as tf
import fileinput
import sklearn
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing

## Train and load to mlflow  

### Train, Save and Evaluate 

In [47]:
#export

classes = []

for file in os.listdir('./'):
    if file.endswith(".csv"):
        classes.append(os.path.splitext(file)[0])

classes.sort()

num_classes = len(classes)

input_list = []
output_list = []


e=np.eye(3)
for index in range(num_classes):
    class_file = classes[index]
    df = pd.read_csv(f"{class_file}.csv")
    input_list.append(df)
    #output_list.append(np.full((df.shape[0],1),index))
    output_list.append(np.full((df.shape[0],num_classes),e[index]))
    
    
X = pd.concat(input_list, axis=0).to_numpy()
y = np.vstack(output_list)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2)

In [54]:
#export
model = tf.keras.Sequential([
  layers.Dense(8, activation='relu'),
  layers.Dense(5, activation='relu'),
  layers.Dense(num_classes, activation='softmax'),
])

model.compile(optimizer='adam', loss='mse', metrics=['mae','accuracy'])
history = model.fit(X_train, y_train, epochs=400, batch_size=4, validation_data=(X_val, y_val))

Epoch 1/400
Epoch 2/400
Epoch 3/400
Epoch 4/400
Epoch 5/400
Epoch 6/400
Epoch 7/400
Epoch 8/400
Epoch 9/400
Epoch 10/400
Epoch 11/400
Epoch 12/400
Epoch 13/400
Epoch 14/400
Epoch 15/400
Epoch 16/400
Epoch 17/400
Epoch 18/400
Epoch 19/400
Epoch 20/400
Epoch 21/400
Epoch 22/400
Epoch 23/400
Epoch 24/400
Epoch 25/400
Epoch 26/400
Epoch 27/400
Epoch 28/400
Epoch 29/400
Epoch 30/400
Epoch 31/400
Epoch 32/400
Epoch 33/400
Epoch 34/400
Epoch 35/400
Epoch 36/400
Epoch 37/400
Epoch 38/400
Epoch 39/400
Epoch 40/400
Epoch 41/400
Epoch 42/400
Epoch 43/400
Epoch 44/400
Epoch 45/400
Epoch 46/400
Epoch 47/400
Epoch 48/400
Epoch 49/400
Epoch 50/400
Epoch 51/400
Epoch 52/400
Epoch 53/400
Epoch 54/400
Epoch 55/400
Epoch 56/400
Epoch 57/400
Epoch 58/400
Epoch 59/400
Epoch 60/400
Epoch 61/400
Epoch 62/400
Epoch 63/400
Epoch 64/400
Epoch 65/400
Epoch 66/400
Epoch 67/400
Epoch 68/400
Epoch 69/400
Epoch 70/400
Epoch 71/400
Epoch 72/400
Epoch 73/400
Epoch 74/400
Epoch 75/400
Epoch 76/400
Epoch 77/400
Epoch 78

In [57]:
#export
loss, mae, accuracy = model.evaluate(X_test, y_test)
print("Accuracy", accuracy)
print("MAE", mae)

Accuracy 1.0
MAE 1.226873678206175e-06


In [72]:
#export
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

with open('model.tflite','wb') as f:
    f.write(tflite_model)

with open('model.h','w') as f:
    f.write('const unsigned char model[] = {\n')
    f.write(','.join([hex(i).ljust(4, '0') for i in tflite_model])) 
    f.write('};')
    
arduino_env = [
    "Arduino_TensorFlowLite@2.4.0-ALPHA",
    "Arduino_APDS9960@1.0.3",
    "Arduino_HTS221@1.0.0",
    "Arduino_LPS22HB@1.0.1",
    "Arduino_LSM9DS1@1.1.0",
    "arduinoFFT@1.5.6"
]

with open('requirements.ino.txt','w') as f:
    for env in arduino_env:
        f.write(f"{env}\n")
        
artifacts = {
    "tfmodel": 'model.tflite',
    "model_header": "model.h",
    "requirements.ino.txt": "requirements.ino.txt",
    "artifacts": "artifacts.ino"
}

INFO:tensorflow:Assets written to: /tmp/tmpenqnd4sh/assets


INFO:tensorflow:Assets written to: /tmp/tmpenqnd4sh/assets
2021-07-06 22:32:06.043719: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2021-07-06 22:32:06.043934: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2021-07-06 22:32:06.045915: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1144] Optimization results for grappler item: graph_to_optimize
  function_optimizer: function_optimizer did nothing. time = 0.014ms.
  function_optimizer: function_optimizer did nothing. time = 0.004ms.

2021-07-06 22:32:06.106396: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:345] Ignored output_format.
2021-07-06 22:32:06.106474: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:348] Ignored drop_control_dependency.


In [64]:
#hide
!echo "const unsigned char model[] = {" > ./model1.h
!cat model.tflite | xxd -i      >> ./model1.h
!echo "};"                              >> ./model1.h

### Custom MLflow model wrapper

In [75]:
#export 
import mlflow.pyfunc
class ArduinoWrapper(mlflow.pyfunc.PythonModel):
    def load_context(self, context): ...
    def predict(self, context, model_input): ...

### Conda environment 

In [74]:
#export 
from sys import version_info
import sklearn
import cloudpickle

PYTHON_VERSION = f"{version_info.major}.{version_info.minor}.{version_info.micro}"

conda_env = {
    'channels': ['defaults'],
    'dependencies': [
        f'python={PYTHON_VERSION}',
        'pip',
        {
            'pip':[
                'mlflow',
                f'scikit-learn=={sklearn.__version__}',
                f'tensorflow=={tf.__version__}'
            ]
        }
    ],
    'name': 'serving_propensity_model'
}

In [78]:
#export
import os
os.environ['MLFLOW_TRACKING_URI']='http://mlflow:5000'

import warnings
import sys

import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from urllib.parse import urlparse
import mlflow
import mlflow.sklearn
import mlflow.pyfunc

#conda_env=mlflow.pyfunc.get_default_conda_env()

with mlflow.start_run():
    
    #mlflow.log_param("var_smoothing", input_params['var_smoothing'])
    mlflow.log_metric("Accuracy", accuracy)
    mlflow.log_metric("MAE", mae)
    
    tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme

    if tracking_url_type_store != "file":
        mlflow.pyfunc.log_model("model",
                                 registered_model_name="arduino_model",
                                 python_model=ArduinoWrapper(),
                                 artifacts=artifacts,
                                 conda_env=conda_env)
    else:
        mlflow.pyfunc.log_model("model",
                                 path=my_model_path,
                                 python_model=ArduinoWrapper(),
                                 artifacts=artifacts,
                                 conda_env=conda_env)

Successfully registered model 'arduino_model'.
2021/07/06 22:35:17 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: arduino_model, version 1
Created version '1' of model 'arduino_model'.


## Export train code 

The above code will be exported to the python file using nbdev library (export, hide, default_exp keyworkd are needed ) 

In [1]:
#hide
from nbdev.export import *
notebook2script()

Converted mlflow_arduino.ipynb.


## Train from command using mlflow

In [2]:
!mlflow run . --no-conda --experiment-name="arduino"

2021/07/07 19:43:06 INFO mlflow.projects.utils: === Created directory /tmp/tmpnjbpz21o for downloading remote URIs passed to arguments of type 'path' ===
2021/07/07 19:43:06 INFO mlflow.projects.backend.local: === Running command 'python3 ./arduino/train.py' in run with ID 'b017ded03b3f4dd780d0437293b7f344' === 
2021-07-07 19:43:07.969535: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-07-07 19:43:07.969680: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-07-07 19:43:12.504953: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2021-07-07 19:43:12.504998: W tensorflow/stream_executor/cuda/cuda_driver.cc:3