In [1]:
import json, shutil, os
import onnxruntime
import pandas as pd
import numpy as np
from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType
import random, os
import numpy as np
from pyspark.sql import Row
from sklearn import neighbors
from pyspark.ml.feature import VectorAssembler
from pyspark.mllib.stat import Statistics
from pyspark.ml import PipelineModel
from pyspark.sql import SparkSession

from pyspark.ml.evaluation import BinaryClassificationEvaluator
from pyspark.ml.classification import LogisticRegression, RandomForestClassifier, GBTClassifier
from pyspark.ml.tuning import ParamGridBuilder, TrainValidationSplit
from pyspark.ml.feature import StringIndexer, VectorAssembler, StandardScaler
from pyspark.ml import Pipeline
from pyspark.mllib.stat import Statistics
from pyspark.ml.linalg import DenseVector
from pyspark.sql import functions as F


input_sample = pd.DataFrame(data={
    "SensorCH1": [-.1],
    "SensorCH2": [.1],
    "SensorCH3": [.1],
    "SensorCH4": [.1],
    "SensorCH5": [5.1],
})

output_sample = np.array([0.1])

In [2]:
output_sample

array([0.1])

In [3]:
import onnxruntime
import onnxmltools
import onnx

In [4]:
spark = SparkSession\
    .builder\
    .appName("ONNX_Model")\
    .config("spark.hadoop.fs.s3a.s3guard.ddb.region","us-east-2")\
    .config("spark.yarn.access.hadoopFileSystems",os.environ["STORAGE"])\
    .getOrCreate()

Setting spark.hadoop.yarn.resourcemanager.principal to pauldefusco


In [8]:
def load_pipeline():
    modelPipeline = PipelineModel.load(os.environ["STORAGE"]+"/pdefusco/pipeline")
    return modelPipeline

In [9]:
modelPipeline = load_pipeline()

In [10]:
df = spark.sql("SELECT * FROM default.lc_smote_subset LIMIT 1")

Hive Session ID = 6fa36075-f1a1-4ced-94f5-ca0ce7febb47


In [14]:
df = df.select(['acc_now_delinq', 'acc_open_past_24mths', 'annual_inc', 'avg_cur_bal', 'funded_amnt'])
input_sample_pd_df = df.toPandas()

                                                                                

In [22]:
input_sample_pd_df.iloc[0]

acc_now_delinq         -0.071031
acc_open_past_24mths    0.895469
annual_inc             -0.084037
avg_cur_bal            -0.192086
funded_amnt            -0.402238
Name: 0, dtype: float64

In [25]:
modelPipeline.transform(df).select("prediction").show()

[Stage 19:>                                                         (0 + 1) / 1]

+----------+
|prediction|
+----------+
|       0.0|
+----------+



                                                                                

In [16]:
model_path = onnx.load("model.onnx").SerializeToString()

so = onnxruntime.SessionOptions()
so.add_session_config_entry('model.onnx', 'ONNX')

session = onnxruntime.InferenceSession(model_path)
output = session.get_outputs()[0] 
inputs = session.get_inputs()

In [34]:
data = {
  "acc_now_delinq": "4",
  "acc_open_past_24mths": "329.08",
  "annual_inc": "1",
  "avg_cur_bal": "1",
  "funded_amnt": "1"
}

In [37]:
df = pd.DataFrame(data, index=[0])
df

Unnamed: 0,acc_now_delinq,acc_open_past_24mths,annual_inc,avg_cur_bal,funded_amnt
0,4,329.08,1,1,1


In [38]:
df = pd.DataFrame(data, index=[0])

df.columns = ['acc_now_delinq', 'acc_open_past_24mths', 'annual_inc', 'avg_cur_bal', 'funded_amnt']

df['acc_now_delinq'] = df['acc_now_delinq'].astype(float)
df['acc_open_past_24mths'] = df['acc_open_past_24mths'].astype(float)
df['annual_inc'] = df['annual_inc'].astype(float)
df['avg_cur_bal'] = df['avg_cur_bal'].astype(float)
df['funded_amnt'] = df['funded_amnt'].astype(float)

In [39]:
df

Unnamed: 0,acc_now_delinq,acc_open_past_24mths,annual_inc,avg_cur_bal,funded_amnt
0,4.0,329.08,1.0,1.0,1.0


In [40]:
input_data= {i.name: v for i, v in zip(inputs, df.values.reshape(len(inputs),1,1).astype(np.float32))}

In [41]:
input_data

{'acc_now_delinq': array([[4.]], dtype=float32),
 'acc_open_past_24mths': array([[329.08]], dtype=float32),
 'annual_inc': array([[1.]], dtype=float32),
 'avg_cur_bal': array([[1.]], dtype=float32),
 'funded_amnt': array([[1.]], dtype=float32)}

In [42]:
output = session.run(None, input_data)

In [45]:
pd.DataFrame(output)[0][0]

0

In [None]:
model_path = onnx.load("model.onnx").SerializeToString()

so = onnxruntime.SessionOptions()
so.add_session_config_entry('model.onnx', 'ONNX')

session = onnxruntime.InferenceSession(model_path)
output = session.get_outputs()[0] 
inputs = session.get_inputs()
    
def run(input_data):
    
    df = pd.DataFrame(data, index=[0])

    df.columns = ['acc_now_delinq', 'acc_open_past_24mths', 'annual_inc', 'avg_cur_bal', 'funded_amnt']

    df['acc_now_delinq'] = df['acc_now_delinq'].astype(float)
    df['acc_open_past_24mths'] = df['acc_open_past_24mths'].astype(float)
    df['annual_inc'] = df['annual_inc'].astype(float)
    df['avg_cur_bal'] = df['avg_cur_bal'].astype(float)
    df['funded_amnt'] = df['funded_amnt'].astype(float)
    
    try:
        input_data= {i.name: v for i, v in zip(inputs, df.values.reshape(len(inputs),1,1).astype(np.float32))}
        output = session.run(None, input_data)
        pred = pd.DataFrame(output)[0][0]

        print('[INFO] Results was ' + json.dumps(pred))
        return {"result": pred}

    except Exception as e:
        result_dict = {"error": str(e)}
    
    return result_dict