Copyright © 2020, SAS Institute Inc., Cary, NC, USA.  All Rights Reserved.
SPDX-License-Identifier: Apache-2.0

# Build and Import a Trained Model into SAS Open Model Manager

This notebook provides an example of how to build and train a Python model and then import the model into SAS Open Model Manager.

Here are the steps:

1. Build and train a model.
2. Serialize the model into a pickle file and deploy the pickle file into SAS Open Model Manager.
3. Write JSON files associated with the trained model and write the score code .py file.
4. Zip the pickle, JSON, and score code files into an archive.
5. Import the ZIP archive file to Open Model Manager via an API call.

### Step 1: Build and Train a Model

In [None]:
from pathlib import Path
import pandas as pd

import sklearn.tree as tree
from sklearn.model_selection import train_test_split

In [None]:
dataFolder = Path.cwd() / 'Data'
zipFolder = Path.cwd() / 'Model'
modelPrefix  = 'hmeqClassTree'

In [None]:
yName = 'BAD'
catName = ['JOB', 'REASON']
intName = ['CLAGE', 'CLNO', 'DEBTINC', 'DELINQ', 'DEROG', 'NINQ', 'YOJ']

inputData = pd.read_csv((Path(dataFolder) / 'hmeq.csv'), sep=',',
                        usecols=[yName]+catName+intName)

In [None]:
useColumn = [yName]
useColumn.extend(catName + intName)
inputData = inputData[useColumn].dropna()

xTrain, xTest, yTrain, yTest = train_test_split(inputData, inputData[yName],
                                                test_size=0.2, random_state=42)

In [None]:
model = tree.DecisionTreeClassifier(criterion='entropy', max_depth=5,
                                    min_samples_split=20,
                                    min_samples_leaf=10,
                                    random_state=42)
print(model)

In [None]:
x = pd.get_dummies(xTrain[catName].astype('category'))
x = x.join(xTrain[intName])
y = yTrain.astype('category')
trainedModel = model.fit(x, y)

In [None]:
yCategory = y.cat.categories
outputVar = pd.DataFrame(columns=['EM_EVENTPROBABILITY', 'EM_CLASSIFICATION'])
outputVar['EM_CLASSIFICATION'] = yCategory.astype('str')
outputVar['EM_EVENTPROBABILITY'] = 0.5

### Step 2: Serialize a Model Into a Pickle File

In [None]:
import pzmm

In [None]:
pzmm.PickleModel.pickleTrainedModel(trainedModel, modelPrefix, zipFolder)

### Step 3: Write JSON Model Files

In [None]:
JSONFiles = pzmm.JSONFiles()
JSONFiles.writeVarJSON(inputData[catName+intName], isInput=True, jPath=zipFolder)

JSONFiles.writeVarJSON(outputVar, isInput=False, jPath=zipFolder)

modelName = 'Home Equity Loan Classification Tree'
JSONFiles.writeModelPropertiesJSON(modelName=modelName,
                                        modelDesc='',
                                        targetVariable=yName,
                                        modelType='tree',
                                        modelPredictors=(catName + intName),
                                        targetEvent=yCategory[1].astype('str'),
                                        numTargetCategories=len(yCategory),
                                        eventProbVar='EM_EVENTPROBABILITY',
                                        jPath=zipFolder)

JSONFiles.writeFileMetadataJSON(modelPrefix, jPath=zipFolder)

In [None]:
fitStatTuples = [('GAMMA', 1.65412, 'TRAIN'),('NObs', 176, 'TEST'),('MCLL', .196882, 'VALIDATE')]
csvPath = Path.cwd() / 'Data/dmcas_fitstat.csv'
JSONFiles = pzmm.JSONFiles()
JSONFiles.writeBaseFitStat(csvPath=csvPath, jPath=zipFolder, userInput=True, tupleList=fitStatTuples)

### Step 4: Zip Model and Relevant Files

In [None]:
pzmm.ZipModel.zipFiles(fileDir=zipFolder, modelPrefix=modelPrefix)

### Step 5: Import Model into SAS Open Model Manager

In [None]:
host = 'myserver.com'
ModelImport = pzmm.ModelImport(host)

In [None]:
zPath = Path(zipFolder) / (modelPrefix + '.zip')
ModelImport.importModel(modelPrefix, projectName='HMEQ', zPath=zPath)