### Requisite Imports

In [1]:
import os
import warnings
import project_lib
import logging
from project_lib import Project
project_lib.utils.logger.get_logger().setLevel(logging.ERROR)
warnings.filterwarnings("ignore")

from clean_transform import *
from cpd_utils import *
from train_model import *

### Read the data from various tables

In [3]:
customer        = pd.read_csv('/project_data/data_asset/customers.tsv', sep='\t', na_values='?')
discount_policy = pd.read_csv('/project_data/data_asset/discount_policy.tsv', sep='\t', na_values='?')
audit           = pd.read_csv('/project_data/data_asset/offer_audit.tsv', sep='\t', na_values='?')
offer           = pd.read_csv('/project_data/data_asset/offers.tsv', sep='\t', na_values='?')
oppo            = pd.read_csv('/project_data/data_asset/opportunities.tsv', sep='\t', na_values='?')
shipment        = pd.read_csv('/project_data/data_asset/shipment_profile.tsv', sep='\t', na_values='?')

### Read Data from Oracle table

In [2]:
query = ['SELECT * FROM HR.CUSTOMERS','SELECT * FROM HR.DISCOUNT_POLICY','SELECT * FROM HR.OFFER_AUDIT',
         'SELECT * FROM HR.OPPORTUNITIES','SELECT * FROM HR.SHIPMENT_PROFILE','SELECT * FROM HR.OFFER']
master_df = connect_to_oracle_db(query)
customer        = col_handler(master_df['CUSTOMERS'])
discount_policy = col_handler(master_df['DISCOUNT_POLICY'])
audit           = col_handler(master_df['OFFER_AUDIT'])
offer           = col_handler(master_df['OFFER'])
oppo            = col_handler(master_df['OPPORTUNITIES'])
shipment        = col_handler(master_df['SHIPMENT_PROFILE'])

### Clean and Transform the data from various Tables

In [3]:
transform(customer,discount_policy,audit,offer,oppo,shipment)

### Save the Transformed file as a data asset

In [6]:
project = Project.access()
add_file_path_as_data_asset("/project_data/data_asset/trn_dataset_transformed.tsv")

File Added as Data Assset


### Evaluation Set

In [7]:
create_eval_set(path="/project_data/data_asset/trn_dataset_transformed.tsv",outfile="/project_data/data_asset/eval_mdd.tsv")

File Added as Data Assset
Evaluation Set /project_data/data_asset/eval_mdd.tsv Created


### Use the Transformed Data to Train the Model

In [4]:
model_lists = main(data_path="/project_data/data_asset/trn_dataset_transformed.tsv")

[2020-05-25 05:06:42,129]: ----- Retrieving Cleaned Training Data -----
[2020-05-25 05:06:42,161]: ----- Training DHL CL -----
[2020-05-25 05:06:42,164]: ----- Data Proprocessing: Checking E-Commerce -----
[2020-05-25 05:06:42,172]: ----- Data Proprocessing: Getting Log Values -----
[2020-05-25 05:06:42,199]: ----- Data Proprocessing: Getting Product Mix Ratios -----
[2020-05-25 05:06:42,229]: ----- Data Proprocessing: Mapping Categorical Data -----
[2020-05-25 05:06:42,241]: ----- Data Proprocessing: Pruning Data -----
[2020-05-25 05:06:42,362]: ----- Define Training and Test Data -----
[2020-05-25 05:06:42,370]: ----- Training 01_gradient_boosting -----
[2020-05-25 05:06:42,372]: ----- Creating Model Pipelines -----
[2020-05-25 05:06:42,374]: ----- Training Model Weights -----


01_gradient_boosting


[2020-05-25 05:07:18,936]: ----- Testing Trained Model -----
[2020-05-25 05:07:18,986]: ----- Calculating Model Metrics -----
[2020-05-25 05:07:18,992]: ----- Training 02_random_forest -----
[2020-05-25 05:07:18,994]: ----- Creating Model Pipelines -----
[2020-05-25 05:07:18,995]: ----- Training Model Weights -----


  RMSE: 8.018
  MAE: 5.819
  R2: 0.674
  Explained_Variance: 0.674
  score_test: 0.674
  score_train: 0.737
--- 01_gradient_boosting time ---
--- 36.620174169540405 seconds ---
02_random_forest


[2020-05-25 05:07:30,237]: ----- Testing Trained Model -----
[2020-05-25 05:07:30,371]: ----- Calculating Model Metrics -----
[2020-05-25 05:07:30,376]: ----- Training 03_neural_networks -----
[2020-05-25 05:07:30,378]: ----- Creating Model Pipelines -----
[2020-05-25 05:07:30,379]: ----- Training Model Weights -----


  RMSE: 8.082
  MAE: 5.728
  R2: 0.669
  Explained_Variance: 0.669
  score_test: 0.669
  score_train: 0.822
--- 02_random_forest time ---
--- 11.383373737335205 seconds ---
03_neural_networks


[2020-05-25 05:09:17,934]: ----- Testing Trained Model -----
[2020-05-25 05:09:18,005]: ----- Calculating Model Metrics -----


  RMSE: 8.48
  MAE: 6.126
  R2: 0.635
  Explained_Variance: 0.635
  score_test: 0.635
  score_train: 0.677
--- 03_neural_networks time ---
--- 107.63349890708923 seconds ---
--- total DHL CL time ---
--- 155.84831142425537 seconds ---


### Save the best performant model

In [9]:
final_model,penul_model = fetch_best_performer(model_lists)

### Save model as an asset in the Project

In [10]:
save_model_data_asset(model=final_model) #Best Model
save_model_data_asset(model=penul_model) #Second Best Model

### Get the latest model

In [11]:
latest_model_file_name = get_latest_model_file(path="/project_data/data_asset/*.model")
latest_model_file_name

In [12]:
asset_id = get_asset_id(os.path.basename(latest_model_file_name))
asset_id

### Define the WML API Credentials

In [13]:
from watson_machine_learning_client import WatsonMachineLearningAPIClient
import os

token = os.environ['USER_ACCESS_TOKEN']

wml_credentials = {
    "token": token,
   "instance_id" : "openshift",
   "url": os.environ['RUNTIME_ENV_APSX_URL'],
   "version": "2.5.0"
}

client = WatsonMachineLearningAPIClient(wml_credentials)
project_uid = os.environ['PROJECT_ID']

### Create Deployment Space

In [14]:
# space_meta = {
#     client.spaces.ConfigurationMetaNames.NAME:"DHL_Staging"
# }
# spaces_details = client.spaces.store(space_meta)
# spaces_details

In [15]:
client.spaces.list(limit=5)

------------------------------------  --------------------  ------------------------
GUID                                  NAME                  CREATED
3b41a3c1-8671-4154-ac1e-58164a6550ff  DHL_Staging           2020-04-23T13:06:52.675Z
f1d6281f-c594-43e6-a8dc-9be486d0b2b4  DHL_deployment_space  2020-04-17T04:09:08.541Z
------------------------------------  --------------------  ------------------------


### Fetch GUID

In [16]:
def guid_from_space_name(client, space_name):
    instance_details = client.service_instance.get_details()
    space = client.spaces.get_details()
    return(next(item for item in space['resources'] if item['entity']["name"] == space_name)['metadata']['guid'])

In [17]:
space_uid = guid_from_space_name(client,"DHL_Staging")
space_uid

'3b41a3c1-8671-4154-ac1e-58164a6550ff'

### Set default Space

In [18]:
client.set.default_space(space_uid)

'SUCCESS'

### Promote Data Assset to Deployment space

In [19]:
client.data_assets.create("eval_mdd.tsv","/project_data/data_asset/eval_mdd.tsv")
client.data_assets.create("preprocess.py","/home/wsuser/work/project_git_repo/dhl_ibm_cp4d/assets/jupyterlab/utils.py")
client.data_assets.create("config.py","/home/wsuser/work/project_git_repo/dhl_ibm_cp4d/assets/jupyterlab/config.py")

### Promote Latest Model to Deployment space

In [20]:
#latest_model_file_name
#client.data_assets.create(os.path.basename(latest_model_file_name),latest_model_file_name)

In [21]:
client.set.default_space(space_uid)
client.data_assets.list()

-------------  ----------  -------  ------------------------------------
NAME           ASSET_TYPE  SIZE     ASSET_ID
eval_mdd.tsv   data_asset  324500   4eab2759-b47a-4d3c-aa6f-fbeaac9798cd
MDD_v1.model   data_asset  4161337  a3f6e103-26c7-47a1-b4b8-61752680b188
MDD_v2.model   data_asset  102801   702aea7b-ae8b-4dd1-b925-8f43aba1a760
preprocess.py  data_asset  6977     2113e3a3-897f-4884-8b71-03fa6a054932
config.py      data_asset  11043    20eb4969-ee06-46fd-84c1-863e14752cec
-------------  ----------  -------  ------------------------------------


### Initialize the deployment function parameters

In [22]:
model_details = client.data_assets.get_details("702aea7b-ae8b-4dd1-b925-8f43aba1a760")
model_id = client.data_assets.get_uid(model_details)
model_href = client.data_assets.get_href(model_details)


data_details = client.data_assets.get_details("4eab2759-b47a-4d3c-aa6f-fbeaac9798cd")
data_id = client.data_assets.get_uid(data_details)


script_details = client.data_assets.get_details("2113e3a3-897f-4884-8b71-03fa6a054932")
script_id = client.data_assets.get_uid(script_details)

config_script_details = client.data_assets.get_details("20eb4969-ee06-46fd-84c1-863e14752cec")
config_script_id = client.data_assets.get_uid(config_script_details)


params = {
    "wml_credentials": wml_credentials,
    "project_uid": project_uid,
    "space_uid": space_uid,
    "mdd_model_id": model_id,
    "data_details":data_id,
    "preprocess_scripts":script_id,
    "config_scripts":config_script_id
    
    
}


### Deployable MDD Python Closure Function

In [23]:
def mdd_deployment_function(params=params):
    import subprocess
    import sys,time
    import pickle
    from watson_machine_learning_client import WatsonMachineLearningAPIClient
    
    params['wml_credentials']['instance_id'] = 'openshift'
    
    sys.path.insert(0,'/home/wmlfuser/.local/lib/python3.6/site-packages/')
    sys.path.insert(0, './')

    
    # Setup Client
    client = WatsonMachineLearningAPIClient(params['wml_credentials'])
    client.set.default_space(params['space_uid'])
    
    #Download Required Models and Files locally
    client.data_assets.download(params['preprocess_scripts'],"preprocess.py")
    client.data_assets.download(params['config_scripts'],"config.py")
    client.data_assets.download(params['mdd_model_id'],"mdd.model")
    #pro_data = pd.read_csv('/project_data/data_asset/eval_mdd.tsv', sep='\t', na_values='?')
    
    
    #Payload Preprocessing 
    
    def payload_process(data):
        import preprocess
        import config
        import pandas as pd
        import numpy as np
        
        #Preprocessing of the Evaluation Input

        data = data.drop(['weight'], axis=1, errors='ignore')

        # ----- Some intial preprocessing calculations -----
        data.new_potential_revenue.replace(0, np.nan, inplace = True)
        data.published_revenue.replace(0, np.nan, inplace = True)
        data.shipments.replace(0, np.nan, inplace = True)
        data.wps.replace(0, np.nan, inplace = True)
        data.insert(11,'weight',data['wps']*data['shipments'])
        data[['weight',
                'published_revenue',
                'new_potential_revenue']] = data[['weight','published_revenue',
                                            'new_potential_revenue']].astype(float)
        data.rename(columns={
                    'reason_for_lead'                :'REASONFORLEAD'
                    ,'lead_source_type'                   :'LEADSOURCE'
                    ,'industry_code'                     :'INDUSTRY'
                    ,'primary_competitor'                     :'MAINCOMP'
                    }, inplace=True)

        label_col = "discount"
        bin_col = ['ecomm']
        cat_col = ['industry','product_cluster','loyalty_code','opportunity_type','reason_for_lead','lead_source_type','competitor']

        num_col = [
        'log_published_revenue', # log(published_revenue)
        'log_published_revenue_sq', # (log(published_revenue))^2
        'log_potential_revenue', # log(new_potential_revenue)
        'log_offer_published_revenue', # log(offer_published_revenue)
        'log_shipments', # log(shipments)
        'log_wps', # log(wps)
        'log_weight', # log(shipments x wps)
        'PROD.MIX:DDEXPORT',
        'PROD.MIX:DDIMPORT',
        'PROD.MIX:TD3RD',
        'PROD.MIX:DOMESTIC',
        'PROD.MIX:TDEXPORT',
        'PROD.MIX:TDIMPORT']

        slice = data.loc[(data['organization_id'] == data['organization_id'].unique()[0])]
        slice['physical_channel'] = slice.physical_channel.astype('str').replace(".",'')
        slice = preprocess.preprocess(slice,label_col,bin_col,num_col,cat_col)
        
        return slice
 

    def score(payload):
        
        import preprocess
        import config
        import pandas as pd
        import numpy as np
        import pickle
        
        field_values =[]
              
        try:
            #Load the Saved Model
            
            with open('mdd.model','rb') as f:
                final_mdd_model = pickle.load(f)
                
            #Decode a Dataframe from Payload Json

            values = payload['input_data'][0]['values']
            fields = payload['input_data'][0]['fields']
            
            data = pd.DataFrame(values,columns=fields)
            data.replace('OPENSCALE','',inplace=True)
            data.replace(99999,0,inplace = True)
            
            categorical_columns = ['industry_code','product_cluster','reason_for_lead','lead_source_type','primary_competitor','contactrole','physical_channel']
            feature_columns = ['offer_id','organization_id','recommended_revenue','recommended_pid','recommended_pid_name','physical_channel','published_revenue','new_potential_revenue','shipments','wps', 'product_cluster', 'industry_code','contactrole','reason_for_lead', 'lead_source_type','primary_competitor']
            non_numerical = ['recommended_pid_name','offer_id','organization_id'] + categorical_columns
            numerical = [each for each in feature_columns if each not in non_numerical]
            for each in numerical:
                if not each =='shipments':
                    data[each] = data[each].astype('float64')
                else:
                    data[each] = data[each].astype('int')
                    
            for each in non_numerical:
                data[each] = data[each].astype('str')
                
            #data['recommended_pid'] = data['recommended_pid'].astype('str')
                       
            values = data.values.tolist()
            
            #Preprocess the raw data load before scoring
            slice = payload_process(data)
            
            
            X = slice
            y = final_mdd_model.predict(slice)
            
            #return {'predictions':[{'results': list(y)}]}
            
        
           
            y = list(y)
            
            i = 0
            while i < len(y) :
                values[i].append(y[i])
                i +=1
                    
            return {'predictions':[{'fields':fields+['predictions'],'values': values}]}
        
        except Exception as e:
            return {"predictions": [{"error" : repr(e)}]}
        
    return score




### Existing Runtime

In [24]:
client.repository.list()

------------------------------------  ------------------------------  ------------------------  ---------  ---------------
GUID                                  NAME                            CREATED                   FRAMEWORK  TYPE
caffe_frcnn                           caffe_frcnn                     2020-04-11T07:24:58.096Z  -          Python runtime
caffe2_0.8                            caffe2_0.8                      2020-04-11T07:24:58.052Z  -          Python runtime
theano_1.0                            theano_1.0                      2020-04-11T07:24:58.000Z  -          Python runtime
do_12.9                               do_12.9                         2020-04-11T07:24:59.662Z  -          do runtime
hybrid_0.2                            hybrid_0.2                      2020-04-11T07:24:59.464Z  -          hybrid runtime
hybrid_0.1                            hybrid_0.1                      2020-04-11T07:24:59.446Z  -          hybrid runtime
torch_lua52                          

### Store the Function in project 

In [25]:
# Function Metadata.
client.set.default_project(project_id=project_uid)

meta_props = {
    client.repository.FunctionMetaNames.NAME: "MDD_DHL_Function_V2",
    client.repository.FunctionMetaNames.RUNTIME_UID: "ai-function_0.1-py3.6",

}

### Create the function artifact for Project

In [26]:
function_artifact = client.repository.store_function(meta_props=meta_props, function=mdd_deployment_function)
function_uid = client.repository.get_function_uid(function_artifact)
print("Function UID = " + function_uid)

### List out Functions in Project

In [27]:
client.repository.list_functions()

### Test the Deployment Function Locally

In [28]:
#pro_data = pd.read_csv('/project_data/data_asset/eval_mdd.tsv', sep='\t', na_values='?')
#Encode
temp = pd.read_csv('/project_data/data_asset/eval_mdd.tsv', sep='\t', na_values='?')
temp = temp.iloc[:5]
fields = list(temp.columns)
values = temp.values.tolist()
request_data = {"values": values, "fields": fields}
payload_data = {"input_data" : [request_data]}
#print(payload_data) 

results = mdd_deployment_function()(payload_data)
results

### Store the Function in deployment space

In [29]:
client.set.default_space(space_uid)

'SUCCESS'

### Function Metadata.

In [30]:
meta_props = {
    client.repository.FunctionMetaNames.NAME: "MDD_DHL_Deployment_Function_V2",
    client.repository.FunctionMetaNames.RUNTIME_UID: "ai-function_0.1-py3.6",
    client.repository.FunctionMetaNames.SPACE_UID: space_uid
}

### Create the Function artifact.

In [31]:
function_artifact = client.repository.store_function(meta_props=meta_props, function=mdd_deployment_function)
function_uid = client.repository.get_function_uid(function_artifact)
print("Function UID = " + function_uid)

### Details about the function.

In [32]:
function_details = client.repository.get_details(function_uid)
from pprint import pprint
pprint(function_details)

### Display a list of all the functions.


In [None]:
client.repository.list_functions()

### Deploy the MDD Function

In [None]:
# Deployment metadata.
deploy_meta = {
    client.deployments.ConfigurationMetaNames.NAME: "MDD_DHL_Deployment_Function_V2",
    client.deployments.ConfigurationMetaNames.DESCRIPTION: "mdd_deployment_function-deploy",
    client.deployments.ConfigurationMetaNames.ONLINE: {}
}

### Create the deployment.

In [None]:
deployment_details = client.deployments.create(function_uid, meta_props=deploy_meta)
# Deployment UID.
deployment_uid = client.deployments.get_uid(deployment_details)
print('Deployment uid = {}'.format(deployment_uid))

### Prepare scoring payload.

In [None]:
payload_data = prepare_scoring_payload(path='/project_data/data_asset/eval_mdd.tsv',no_of_records=2)

fields = payload_data['input_data'][0]['fields']
values = payload_data['input_data'][0]['values']

job_payload = {
    client.deployments.ScoringMetaNames.INPUT_DATA: [{
        'fields': fields,
        'values': values
    }]
}

### MDD Model Scoring

In [None]:
job_details = client.deployments.score(deployment_uid, job_payload)
job_details