### Goal: Calculate Predictions Using Registered Model
In this notebook we load the most recent Staging model from the model registry and calculate predictions on the dataframe.

In [0]:
# Specifying schema because inferSchema is not working correctly
from pyspark.sql.types import StructType
from pyspark.sql.types import DoubleType
from pyspark.sql.types import StringType

schema = StructType() \
      .add("RACE",StringType(),True) \
      .add("DIVISION",StringType(),True) \
      .add("LIVARAG",StringType(),True) \
      .add("VET",StringType(),True) \
      .add("METHUSE",StringType(),True) \
      .add("ALCFLG",StringType(),True) \
      .add("HERFLG",StringType(),True) \
      .add("COKEFLG",StringType(),True) \
      .add("EDUC",DoubleType(),True) \
      .add("EMPLOY",StringType(),True) \
      .add("SERVICES",StringType(),True) \
      .add("OPSYNFLG",StringType(),True) \
      .add("FRSTUSE",DoubleType(),True) \
      .add("PSOURCE",StringType(),True) \
      .add("FREQ",DoubleType(),True) \
      .add("NOPRIOR",DoubleType(),True)

In [0]:
# Read in 1000 rows of data to calculate predictions on
predict_data = spark.read.format("csv").option("header", True).schema(schema).load('/FileStore/tables/substance_abuse_holdout_data.csv')

In [0]:
import mlflow

# Using "best model" registration to load model from MLFlow
model_name = "substance-abuse-best-model"
stage = 'Staging'

model_uri=f"models:/{model_name}/{stage}"
model = mlflow.spark.load_model(model_uri)

# Calculate predictions on Spark dataframe
predictions = model.transform(predict_data)

In [0]:
display(predictions)

RACE,DIVISION,LIVARAG,VET,METHUSE,ALCFLG,HERFLG,COKEFLG,EDUC,EMPLOY,SERVICES,OPSYNFLG,FRSTUSE,PSOURCE,FREQ,NOPRIOR,EMPLOY_Index,PSOURCE_Index,HERFLG_Index,SERVICES_Index,ALCFLG_Index,COKEFLG_Index,RACE_Index,LIVARAG_Index,OPSYNFLG_Index,VET_Index,METHUSE_Index,DIVISION_Index,VET_OHE,EMPLOY_OHE,COKEFLG_OHE,ALCFLG_OHE,RACE_OHE,PSOURCE_OHE,DIVISION_OHE,METHUSE_OHE,SERVICES_OHE,OPSYNFLG_OHE,LIVARAG_OHE,HERFLG_OHE,EDUC_Imputed,FRSTUSE_Imputed,FREQ_Imputed,features,rawPrediction,probability,prediction
White,East_South_Central,Independent_Living,No,No,Not_Reported,Not_Reported,Not_Reported,3.0,Unemployed,Ambulatory_NonIntensive,Reported,5.0,Court,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,8.0,"Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 8, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 6, indices -> List(1), values -> List(1.0))","Map(vectorType -> sparse, length -> 9, indices -> List(8), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))",3.0,5.0,0.0,"Map(vectorType -> sparse, length -> 44, indices -> List(0, 16, 17, 19, 20, 21, 22, 23, 24, 27, 36, 41, 42), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 5.0))","Map(vectorType -> dense, length -> 2, values -> List(0.22102078446426146, -0.22102078446426146))","Map(vectorType -> dense, length -> 2, values -> List(0.6087453880918002, 0.39125461190819977))",0.0
White,Mountain,Independent_Living,No,Yes,Not_Reported,Not_Reported,Not_Reported,,Unemployed,Ambulatory_NonIntensive,Not_Reported,,Self,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,"Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 8, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 6, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 9, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 7, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))",3.0,3.0,2.0,"Map(vectorType -> sparse, length -> 44, indices -> List(0, 10, 17, 19, 21, 22, 23, 24, 27, 34, 35, 41, 42, 43), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 2.0))","Map(vectorType -> dense, length -> 2, values -> List(-0.21031186839168273, 0.21031186839168273))","Map(vectorType -> dense, length -> 2, values -> List(0.39636750503935486, 0.6036324949606451))",1.0
White,Mountain,Independent_Living,No,Yes,Not_Reported,Not_Reported,Not_Reported,,Unemployed,Ambulatory_NonIntensive,Not_Reported,,Self,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,"Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 8, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 6, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 9, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 7, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))",3.0,3.0,2.0,"Map(vectorType -> sparse, length -> 44, indices -> List(0, 10, 17, 19, 21, 22, 23, 24, 27, 34, 35, 41, 42, 43), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 2.0))","Map(vectorType -> dense, length -> 2, values -> List(-0.21031186839168273, 0.21031186839168273))","Map(vectorType -> dense, length -> 2, values -> List(0.39636750503935486, 0.6036324949606451))",1.0
White,Mountain,Independent_Living,No,Yes,Not_Reported,Not_Reported,Not_Reported,,Unemployed,Ambulatory_NonIntensive,Not_Reported,,Self,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,"Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 8, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 6, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 9, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 7, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))",3.0,3.0,2.0,"Map(vectorType -> sparse, length -> 44, indices -> List(0, 10, 17, 19, 21, 22, 23, 24, 27, 34, 35, 41, 42, 43), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 2.0))","Map(vectorType -> dense, length -> 2, values -> List(-0.21031186839168273, 0.21031186839168273))","Map(vectorType -> dense, length -> 2, values -> List(0.39636750503935486, 0.6036324949606451))",1.0
White,Mountain,Independent_Living,No,Yes,Not_Reported,Not_Reported,Not_Reported,,Unemployed,Ambulatory_NonIntensive,Not_Reported,,Self,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,"Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 8, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 6, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 9, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 7, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))",3.0,3.0,2.0,"Map(vectorType -> sparse, length -> 44, indices -> List(0, 10, 17, 19, 21, 22, 23, 24, 27, 34, 35, 41, 42, 43), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 2.0))","Map(vectorType -> dense, length -> 2, values -> List(-0.21031186839168273, 0.21031186839168273))","Map(vectorType -> dense, length -> 2, values -> List(0.39636750503935486, 0.6036324949606451))",1.0
White,Mountain,Independent_Living,No,Yes,Not_Reported,Not_Reported,Not_Reported,,Unemployed,Ambulatory_NonIntensive,Not_Reported,,Self,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,"Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 8, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 6, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 9, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 7, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))",3.0,3.0,2.0,"Map(vectorType -> sparse, length -> 44, indices -> List(0, 10, 17, 19, 21, 22, 23, 24, 27, 34, 35, 41, 42, 43), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 2.0))","Map(vectorType -> dense, length -> 2, values -> List(-0.21031186839168273, 0.21031186839168273))","Map(vectorType -> dense, length -> 2, values -> List(0.39636750503935486, 0.6036324949606451))",1.0
White,Mountain,Independent_Living,No,Yes,Not_Reported,Not_Reported,Not_Reported,,Unemployed,Ambulatory_NonIntensive,Not_Reported,,Self,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,"Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 8, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 6, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 9, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 7, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))",3.0,3.0,2.0,"Map(vectorType -> sparse, length -> 44, indices -> List(0, 10, 17, 19, 21, 22, 23, 24, 27, 34, 35, 41, 42, 43), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 2.0))","Map(vectorType -> dense, length -> 2, values -> List(-0.21031186839168273, 0.21031186839168273))","Map(vectorType -> dense, length -> 2, values -> List(0.39636750503935486, 0.6036324949606451))",1.0
White,Mountain,Independent_Living,No,Yes,Not_Reported,Not_Reported,Not_Reported,,Unemployed,Ambulatory_NonIntensive,Not_Reported,,Self,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,"Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 8, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 6, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 9, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 7, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))",3.0,3.0,2.0,"Map(vectorType -> sparse, length -> 44, indices -> List(0, 10, 17, 19, 21, 22, 23, 24, 27, 34, 35, 41, 42, 43), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 2.0))","Map(vectorType -> dense, length -> 2, values -> List(-0.21031186839168273, 0.21031186839168273))","Map(vectorType -> dense, length -> 2, values -> List(0.39636750503935486, 0.6036324949606451))",1.0
White,Mountain,Independent_Living,No,Yes,Not_Reported,Not_Reported,Not_Reported,4.0,Unemployed,Ambulatory_NonIntensive,Reported,,Self,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,2.0,"Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 8, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 6, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 9, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 7, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))",4.0,3.0,2.0,"Map(vectorType -> sparse, length -> 44, indices -> List(0, 10, 17, 19, 21, 22, 23, 24, 27, 35, 41, 42, 43), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 4.0, 3.0, 2.0))","Map(vectorType -> dense, length -> 2, values -> List(-0.24618344942011416, 0.24618344942011416))","Map(vectorType -> dense, length -> 2, values -> List(0.3793361455005438, 0.6206638544994563))",1.0
White,Mountain,Independent_Living,No,No,Reported,Not_Reported,Not_Reported,1.0,Part_Time,Ambulatory_NonIntensive,Not_Reported,4.0,Court,,1.0,3.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,"Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 8, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 6, indices -> List(1), values -> List(1.0))","Map(vectorType -> sparse, length -> 9, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))",1.0,4.0,2.0,"Map(vectorType -> sparse, length -> 44, indices -> List(0, 10, 17, 19, 20, 22, 23, 27, 34, 36, 41, 42, 43), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 4.0, 2.0))","Map(vectorType -> dense, length -> 2, values -> List(0.2328953902368372, -0.2328953902368372))","Map(vectorType -> dense, length -> 2, values -> List(0.6143870079554541, 0.3856129920445459))",0.0


In [0]:
# Writing predictions to Delta Lake
predictions.write.format("delta").mode("overwrite").save("/predictions/substance_abuse_predictions")