# AI ENTERPRISE WORKFLOW CERTIFICATION

### Capstone Project - Part 3.  Model Production

### Outline:

1. Build a draft version of an API with train, predict, and logfile endpoints.
2. Using Docker, bundle your API, model, and unit tests.
3. Using test-driven development iterate on your API in a way that anticipates scale, load, and drift.
4. Create a post-production analysis script that investigates the relationship between model performance and the business metric.
5. Articulate your summarized findings in a final report.



At a high level you are being asked to:
1. Ready for your model for deployment.
2. Query your API with new data and test your monitoring tools.
3. Compare results to the gold standard


## Create an API with FLASK

In [None]:
%%writefile app.py

import argparse
from flask import Flask, jsonify, request
from flask import render_template
import joblib
import socket
import json
import numpy as np
import pandas as pd
import os

## import model specific functions and variables
from project_setup import *
from data_modelling import *
from logger import *

app = Flask(__name__)


# Predict 
@app.route('/predict', methods=['GET','POST'])
def predict():
    """
    basic predict function for the API
    """
    
    ## input checking
    if not request.json:
        print("ERROR: API (predict): did not receive request data")
        return jsonify([])
    
    if 'country' not in request.json:
        print("ERROR API (predict): received request, but no 'country' found within")
        return jsonify(False)
        
    if 'year' not in request.json:
        print("ERROR API (predict): received request, but no 'year' found within")
        return jsonify(False)
        
    if 'month' not in request.json:
        print("ERROR API (predict): received request, but no 'month' found within")
        return jsonify(False)
        
    if 'day' not in request.json:
        print("ERROR API (predict): received request, but no 'day' found within")
        return jsonify(False)

        
    ## predict
    _result = result = model_predict(year=request.json['year'],
                                     month=request.json['month'],
                                     day=request.json['day'],
                                     country=request.json['country'],
                                    )
    
    ## convert numpy objects so ensure they are serializable
    result = result.tolist()
    


    return(jsonify(result))

# Train 
@app.route('/train', methods=['GET','POST'])
def train():
    """
    basic train function for the API

    the 'dev' give you the ability to toggle between a DEV version and a PROD verion of training
    """

    if not request.json:
        print("ERROR: API (train): did not receive request data")
        return jsonify(False)

    if 'test' not in request.json:
        print("ERROR API (train): received request, but no 'test' found within")
        return jsonify(False)

    print("... training model")
    model = model_train()
    print("... training complete")

    return(jsonify(True))

# Log
@app.route('/logging', methods=['GET','POST'])
def load_logs():
    """
    basic logging function for the API
    """

    if not request.json:
        print("ERROR: API (train): did not receive request data")
        return jsonify(False)

    if 'env' not in request.json:
        print("ERROR API (log): received request, but no 'env' found within")
        return jsonify(False)
        
        
    if 'month' not in request.json:
        print("ERROR API (log): received request, but no 'month' found within")
        return jsonify(False)
        
    if 'year' not in request.json:
        print("ERROR API (log): received request, but no 'year' found within")
        return jsonify(False)
    
    print("... fetching logfile")
    logfile = log_load(env=request.json['env'],
                       year=request.json['year'],
                       month=request.json['month'])
    
    result = {}
    result["logfile"]=logfile
    return(jsonify(result))



if __name__ == '__main__':

    ## parse arguments for debug mode
    ap = argparse.ArgumentParser()
    ap.add_argument("-d", "--debug", action="store_true", help="debug flask")
    args = vars(ap.parse_args())

    if args["debug"]:
        app.run(debug=True, port=8080)
    else:
        app.run(host='0.0.0.0', threaded=True ,port=8080)

#### Run the app 

To run the FLASK App - go into the directory where app.py is stored and run with:
```
python3 app.py
```


### Test the API

In [None]:
## API predict
import requests
from ast import literal_eval

query = {"year":"2018","month":"2","day":"11","country":"all"}
port = 8080
r = requests.post(f'http://localhost:{port}/predict',json=query)
response = literal_eval(r.text)
print(response)

In [None]:
## API train
query = {"ts_dir":"TS_DIR", "test":"False"}
port = 8080
r = requests.post('http://localhost:{}/train'.format(port),json=query)
r.text

In [None]:
## API Log
query = {"env":"train","year":"2021","month":"04"}
port = 8080
r = requests.post('http://localhost:{}/logging'.format(port),json=query)
response = literal_eval(r.text)
print(response)

## Unit Tests

In [1]:
import os

#Load from 
from project_setup import PROJECT_DATA_DIR, UNITTEST_DIR

# create path if needed
if not os.path.exists(UNITTEST_DIR):
    os.mkdir(os.path.join(UNITTEST_DIR))


In [2]:
%%writefile runtime/unittests/__init__.py

import unittest
import getopt
import sys
import os

## parse inputs
try:
    optlist, args = getopt.getopt(sys.argv[1:],'v')
except getopt.GetoptError:
    print(getopt.GetoptError)
    print(sys.argv[0] + "-v")
    print("... the verbose flag (-v) may be used")
    sys.exit()

VERBOSE = False
RUNALL = False

sys.path.append(os.path.realpath(os.path.dirname(__file__)))

for o, a in optlist:
    if o == '-v':
        VERBOSE = True

## api tests
from ApiTests import *
ApiTestSuite = unittest.TestLoader().loadTestsFromTestCase(ApiTest)

## model tests
from ModelTests import *
ModelTestSuite = unittest.TestLoader().loadTestsFromTestCase(ModelTest)

## logger tests
from LoggerTests import *
LoggerTestSuite = unittest.TestLoader().loadTestsFromTestCase(LoggerTest)

MainSuite = unittest.TestSuite([LoggerTestSuite,ModelTestSuite,ApiTestSuite])


Overwriting runtime/unittests/__init__.py


#### Generate Unit Tests

In [3]:
%%writefile runtime/unittests/ModelTests.py
#!/usr/bin/env python

"""
model tests
"""

import unittest
from project_setup import PROJECT_DATA_DIR, UNITTEST_DIR, MODEL_DIR, TEST, TS_DIR

from data_modelling import *

class ModelTest(unittest.TestCase):
    """
    test the essential functionality
    """
    
    def test_01_train(self):
        """
        test the train functionality
        """
        
        print("Test: Model-Train")
        
        ## train the model
        model_train(TS_DIR, TEST)
        
        prefix = 'test' if TEST else 'sl'
        models = [f for f in os.listdir(MODEL_DIR) if re.search(prefix,f)]
        self.assertEqual(len(models),11)
        
    def test_02_load(self):
        """
        test the train functionality
        """
        print("Test: Model-Load")
       
        ## load the model
        models = model_load()
        
        for tag, model in models.items():
            self.assertTrue("predict" in dir(model))
            self.assertTrue("fit" in dir(model))
        
        
    def test_03_predict(self):
        """
        test the predict function input
        """

        print("Test: Model-Predict-Input")
    
        ## query inputs
        query = ["2018", "1", "5", "all"]
        
        ## load model first
        y_pred = model_predict(year=query[0], month=query[1], day=query[2], country=query[3])
        self.assertTrue(y_pred.dtype==np.float64)
        
               
    def test_04_predict(self):
        """
        test the predict function accuracy
        """
        
        print("Test: Model-Predict-Accuracy")
   
         ## example predict
        example_queries = [["2018", "11", "02", "all"],
                           ["2019", "01", "01", "EIRE"],
                           ["2018", "03", "05", "all"]]

        for query in example_queries:
            y_pred = model_predict(year=query[0], month=query[1], day=query[2], country=query[3])
            self.assertTrue(y_pred.dtype==np.float64)
        
## run the tests
if __name__ == "__main__":
    unittest.main()

Overwriting runtime/unittests/ModelTests.py


In [4]:
%run runtime/unittests/ModelTests.py

Test: Model-Train
Ingesting timeseries data from files.
...training model for EIRE
The best model for EIRE is RFR.
...training model for Hong Kong
The best model for Hong Kong is RFR.
...training model for Germany
The best model for Germany is RFR.
...training model for Netherlands
The best model for Netherlands is XGB.
...training model for France
The best model for France is RFR.
...training model for Singapore
The best model for Singapore is ADA.
...training model for Spain
The best model for Spain is GBR.
...training model for all
The best model for all is GBR.
...training model for United Kingdom
The best model for United Kingdom is XGB.
...training model for Norway
The best model for Norway is DTR.
...training model for Portugal
The best model for Portugal is XGB.


..

Test: Model-Load
...Loading models
Test: Model-Predict-Input
Ingesting timeseries data from files.


.

...Loading models
2018-01-05
Test: Model-Predict-Accuracy
Ingesting timeseries data from files.
...Loading models
2018-11-02
Ingesting timeseries data from files.
...Loading models
2019-01-01
Ingesting timeseries data from files.


.

...Loading models
2018-03-05



----------------------------------------------------------------------
Ran 4 tests in 489.467s

OK


In [7]:
%%writefile runtime/unittests/LoggerTests.py
#!/usr/bin/env python
"""
logger tests
"""

import unittest
## import model specific functions and variables
from logger import *

class LoggerTest(unittest.TestCase):
    """
    test the essential log functionality
    """
        
    def test_01_train(self):
        """
        test the train functionality
        """

        ## train logfile
        today = date.today()
        logfile = "{}-{}-{}.log".format("train",today.year,today.month)
        log_path = os.path.join(LOG_DIR, logfile)
        
        self.assertTrue(os.path.exists(log_path))



    def test_02_predict(self):
        """
        test the predict functionality
        """
        
        ## train logfile
        today = date.today()
        logfile = "{}-{}-{}.log".format("predict",today.year,today.month)
        log_path = os.path.join(LOG_DIR, logfile)
        
        self.assertTrue(os.path.exists(log_path))

        
        
    def test_03_load(self):
        """
        test the load functionality
        """

        ## load model first
        logfile = log_load(env = 'train',year=2021,month=4, verbose=False)
        logpath = os.path.join(LOG_DIR, logfile)
        with open(logpath, "r") as log:
            text = log.read()
        self.assertTrue(len(text.split("\n"))>2)        
        
        
### Run the tests
if __name__ == '__main__':
    unittest.main()

Overwriting runtime/unittests/LoggerTests.py


In [8]:
%run runtime/unittests/LoggerTests.py

...
----------------------------------------------------------------------
Ran 3 tests in 0.003s

OK


In [1]:
%%writefile runtime/unittests/ApiTests.py
#!/usr/bin/env python
"""
api tests

these tests use the requests package however similar requests can be made with curl

e.g.
data = '{"key":"value"}'
curl -X POST -H "Content-Type: application/json" -d "%s" http://localhost:8080/predict'%(data)
"""

import sys
import os
import unittest
import requests
import re
from ast import literal_eval
import numpy as np
import pandas as pd

port = 8080

try:
    requests.post('http://localhost:{}/predict'.format(port))
    server_available = True
except:
    server_available = False
    
## test class for the main window function
class ApiTest(unittest.TestCase):
    """
    test the essential functionality
    """
    
    @unittest.skipUnless(server_available,"local server is not running")
    def test_predict(self):
        """
        test the predict functionality
        """

        query = {"year":"2019","month":"2","day":"1","country":"all"}
        r = requests.post('http://localhost:{}/predict'.format(port),json=query)
        response = literal_eval(r.text)
        self.assertTrue(isinstance(response[0],float))

    @unittest.skipUnless(server_available,"local server is not running")
    def test_train(self):
        """
        test the train functionality
        """
      
        query = {"ts_dir":"TS_DIR", "test": "False" }
        r = requests.post('http://localhost:{}/train'.format(port),json=query)
        train_complete = re.sub("\W+","",r.text)
        self.assertEqual(train_complete,'true')   
        
    def test_logging(self):
        """
        test the logging functionality
        """
        
        query = {"env":"train","year":"2021","month":"04"}
        r = requests.post('http://localhost:{}/logging'.format(port),json=query)
        response = literal_eval(r.text)
        self.assertEqual(response.get("logfile"),'train-2021-4.log')
    
### Run the tests
if __name__ == '__main__':
    unittest.main()



Overwriting runtime/unittests/ApiTests.py


In [None]:
%run runtime/unittests/ApiTests.py

### Run all the tests

In [None]:
%%writefile runtime/run-tests.py


import sys
import unittest

from unittests import *
unittest.main()

In [None]:
%run runtime/run-tests.py

## Create Docker file with all necessary libraries

### Setup the docker environment

In [None]:
import os

#Load from 
from project_setup import DOCKER_DIR

# create path if needed
if not os.path.exists(DOCKER_DIR):
    os.mkdir(os.path.join(DOCKER_DIR))

### Writer Requirements file with necessary libs for the container runtime

In [None]:
%%writefile runtime/dockerenv/requirements.txt

pandas
numpy
scikit-learn
matplotlib
xgboost
IPython
seaborn
flask
jsonify

In [None]:
%%writefile runtime/dockerenv/Dockerfile

# Use an official Python runtime as a parent image
FROM python:3.7.5-stretch

RUN apt-get update && apt-get install -y \
python3-dev \
build-essential    
        
# Set the working directory to /app
WORKDIR /app

# Copy the directory contents into the container at /app
ADD ../../. /app

# Install any needed packages specified in requirements.txt
RUN pip install --upgrade pip
RUN pip install --no-cache-dir -r requirements.txt

# Make port 80 available to the world outside this container
EXPOSE 80

# Define environment variable
ENV NAME World

# Run app.py when the container launches
CMD ["python", "app.py"]