Copyright © 2020, SAS Institute Inc., Cary, NC, USA.  All Rights Reserved.
SPDX-License-Identifier: Apache-2.0

# Fleet Maintenance Simple: Build and Import Trained Models into SAS Model Manager

This notebook provides an example of how to build and train a Python model and then import the model into SAS Model Manager using the fleet maintenance data set. Lines of code that must be modified by the user, such as directory paths are noted with the comment "_Changes required by user._".

_**Note:** If you download only this notebook and not the rest of the repository, you must also download the fleet_maintenance.csv from the data folder in the examples directory. These files are used when executing this notebook example._

Here are the steps shown in this notebook:

1. Import and review data and preprocess for model training.
2. Build, train, and access a decision tree, random forest, and gradient boosting model.
3. Serialize the models into separate pickle files.
4. Write the metadata JSON files needed for importing into SAS Model Manager as well as optional files for fit statistics and ROC/Lift charts.
4. Write a score code Python file for model scoring.
5. Zip the pickle, JSON, and score code files into an archive file.
6. Import the ZIP archive file to SAS Model Manager via the Session object and relevant function call.

### Python Package Imports

In [1]:
# Dataframes for data manipulations
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
# Mathematical calculations and array handling
import numpy as np

# Data partitioning for TRAIN and TEST data sets
from sklearn.model_selection import train_test_split
# Decision tree sklearn models
from sklearn.tree import DecisionTreeClassifier
# Model assessments 
from sklearn.metrics import classification_report, confusion_matrix

# Embedded plotting
import matplotlib.pyplot as plt 
plt.rc("font", size=14)

# Pathing support
from pathlib import Path

In [2]:
# sasctl interface for importing models
import sasctl.pzmm as pzmm
from sasctl import Session
from sasctl.services import model_repository as modelRepo

### Import Data Set

In [3]:
fleetData = pd.read_csv('data/fleet_maintenance.csv',sep= ',')
fleetData.shape

(8307, 40)

### Preprocess Data

In [4]:
predictorColumns = ['Speed_sensor', 'Vibration', 'Engine_Load', 'Coolant_Temp', 'Intake_Pressure', 'Engine_RPM', 'Speed_OBD',
       'Intake_Air', 'Flow_Rate', 'Throttle_Pos', 'Voltage', 'Ambient', 'Accel', 'Engine_Oil_Temp', 'Speed_GPS', 
       'GPS_Longitude', 'GPS_Latitude', 'GPS_Bearing', 'GPS_Altitude', 'Turbo_Boost', 'Trip_Distance', 'Litres_Per_km',
       'Accel_Ssor_Total', 'CO2', 'Trip_Time', 'CO_emission', 'HC_emission', 'PM_emission', 'NOx_emission', 'CO2_emission',
       'Fuel_level', 'Oil_life', 'Vibration_alert', 'VibrationAlert_Total', 'Vibration_Recent', 'Turbo_alert', 
       'Emission_alert', 'Fog_control', 'Engine_control']

targetColumn = 'Maintenance_flag'
x = fleetData[predictorColumns]
y = fleetData[targetColumn]

xTrain, xTest, yTrain, yTest = train_test_split(x, y, test_size=0.3, random_state=42)

# For missing values, impute the data set's mean value
xTest.fillna(xTest.mean(), inplace=True)
xTrain.fillna(xTrain.mean(), inplace=True)

### Create, Train, and Assess Model

In [5]:
treeModel = DecisionTreeClassifier(random_state=42, min_samples_leaf=25)
treeModel = treeModel.fit(xTrain, yTrain)

In [6]:
yTreePredict = treeModel.predict(xTest)
yTreeProba = treeModel.predict_proba(xTest)
print(confusion_matrix(yTest, yTreePredict))
print(classification_report(yTest, yTreePredict))
print('Decision Tree Model Accuracy = ' + str(np.round(treeModel.score(xTest, yTest)*100,2)) + '%')

[[1698  240]
 [ 203  352]]
              precision    recall  f1-score   support

           0       0.89      0.88      0.88      1938
           1       0.59      0.63      0.61       555

    accuracy                           0.82      2493
   macro avg       0.74      0.76      0.75      2493
weighted avg       0.83      0.82      0.82      2493

Decision Tree Model Accuracy = 82.23%


### Register Model in SAS Model Manager with pzmm

In [7]:
modelPrefix = 'DecisionTreeClassifier'
zipFolder = Path.cwd() / 'data/FleetMaintenanceModels/DecisionTreeClassifierSimple/'

pzmm.PickleModel.pickleTrainedModel(_, treeModel, modelPrefix, zipFolder)

In [8]:
def writeJSONFiles(data, predict, target, zipFolder, yTrain, modelPrefix):
    J = pzmm.JSONFiles()
    
    # Write input variable mapping to a json file
    J.writeVarJSON(data[predict], isInput=True, jPath=zipFolder)
    
    # Set output variables and assign an event threshold, then write output variable mapping
    outputVar = pd.DataFrame(columns=['EM_EVENTPROBABILITY', 'EM_CLASSIFICATION'])
    outputVar['EM_CLASSIFICATION'] = yTrain.astype('category').cat.categories.astype('str')
    outputVar['EM_EVENTPROBABILITY'] = 0.5 # Event threshold
    J.writeVarJSON(outputVar, isInput=False, jPath=zipFolder)
    
    # Write model properties to a json file
    J.writeModelPropertiesJSON(modelName=modelPrefix,
                               modelDesc='',
                               targetVariable=target,
                               modelType='',
                               modelPredictors=predict,
                               targetEvent=1,
                               numTargetCategories=1,
                               eventProbVar='EM_EVENTPROBABILITY',
                               jPath=zipFolder,
                               modeler='sasdemo')
    
    # Write model metadata to a json file
    J.writeFileMetadataJSON(modelPrefix, jPath=zipFolder)

writeJSONFiles(fleetData, predictorColumns, targetColumn, zipFolder, yTrain, modelPrefix)

In [9]:
import getpass
def writeModelStats(xTrain, yTrain, testProba, yTest, model, target, zipFolder, conn):
    J = pzmm.JSONFiles()
    
    # Calculate train predictions
    trainProba = model.predict_proba(xTrain)
    
    # Assign data to lists of actual and predicted values
    trainData = pd.concat([yTrain.reset_index(drop=True), pd.Series(data=trainProba[:,1])], axis=1)
    testData = pd.concat([yTest.reset_index(drop=True), pd.Series(data=testProba[:,1])], axis=1)
    
    # Calculate the model statistics and write to json files
    J.calculateFitStat(trainData=trainData, testData=testData, jPath=zipFolder)
    J.generateROCLiftStat(target, 1, conn, trainData=trainData, testData=testData, jPath=zipFolder)
    
username = 'edmdev'#getpass.getpass()
password = 'Go4thsas'#getpass.getpass()
host = 'summer.edmt.sashq-d.openstack.sas.com'#'red.ingress-nginx.rint08-0053.race.sas.com'
sess = Session(host, username, password, protocol='http')
#conn = sess.as_swat()

#writeModelStats(xTrain, yTrain, yTreeProba, yTest, treeModel, targetColumn, zipFolder, conn)

In [10]:
I = pzmm.ImportModel()
with sess:
    I.pzmmImportModel(zipFolder, modelPrefix, 'EDMMMX-5441', x, y, '{}.predict({})')

AttributeError: 'RestObj' object has no attribute 'status_code'