In [None]:
# publish R models in Azure http://blog.revolutionanalytics.com/2015/09/publishing-r-models-as-a-service-with-azure-ml.html
# deploy predictive model in Azure https://blogs.technet.microsoft.com/machinelearning/2015/09/25/build-deploy-predictive-web-apps-using-rstudio-and-azure-ml/
# Detailed code on predictive model: https://github.com/Azure/Azure-MachineLearning-DataScience/tree/master/Misc/KDDCup2016/Code/MRS


# how to install AzureML https://github.com/RevolutionAnalytics/AzureML
# Azure ML documentation https://cran.r-project.org/web/packages/AzureML/AzureML.pdf
# Install zip http://stackoverflow.com/questions/35178688/publishing-azureml-webservice-from-r-requires-external-zip-utility
# zip error code https://github.com/RevolutionAnalytics/AzureML/blob/master/R/internal.R

# Set compute context https://github.com/Microsoft/RServer-for-HDInsight-example-CriteoDataSet/blob/master/RServerCode/SetComputeContext.R

################################################
# Set compute context
################################################
isLinux <- Sys.info()["sysname"] == "Linux"

useHDFS <- isLinux
useRxSpark <- isLinux

# to access our data from marinch103:
if(Sys.info()["nodename"] == "ed00-marinc") {
  rxOptions(hdfsHost = "wasb://hdfs@hdic1.blob.core.windows.net")
}

if(useHDFS) {

  ################################################
  # Use Hadoop-compatible Distributed File System
  ################################################
  
  rxOptions(fileSystem = RxHdfsFileSystem())
  
  dataDir <- "/HdiSamples/HdiSamples/FlightDelay"
  
  ################################################

  if(rxOptions()$hdfsHost == "default") {
    fullDataDir <- dataDir
  } else {
    fullDataDir <- paste0(rxOptions()$hdfsHost, dataDir)
  }  
} else {
  
  ################################################
  # Use Native, Local File System
  ################################################

  rxOptions(fileSystem = RxNativeFileSystem())
  
  dataDir <- file.path(getwd(), "delayDataLarge")
  
  ################################################
}

if(useRxSpark) {
  
  ################################################
  # Distributed computing using Spark
  ################################################

  computeContext <- RxSpark(consoleOutput=TRUE)
  
  ################################################

} else {
  
  ################################################
  # Single-node Computing
  ################################################

  computeContext <- RxLocalSeq()
  
  ################################################
}

rxSetComputeContext(computeContext)


if(Sys.getenv("R_ZIPCMD")=="")
{
  Sys.setenv(R_ZIPCMD="zip") # needed by AzureML::publishWebService
}


rxRoc <- function(...){
  rxSetComputeContext(RxLocalSeq())

  roc <- RevoScaleR::rxRoc(...)

  rxSetComputeContext(computeContext)

  return(roc)
}



# Use the AzureML CRAN package to deploy the tree-based model as a scalable web service.
setwd("/home/remoteuser/Code/MRS")
source("SetComputeContext.r")

# Load our rxDTree Decision Tree model

load("dTreeModelSubset.RData") # loads dTreeModel

# Convert to open source R model

rpartModel <- as.rpart( dTreeModel )

# Define a scoring function to be published as a web service

scoringFn <- function(newdata){
  library(rpart)
  predict(rpartModel, newdata=newdata)
}

trainDS <- RxXdfData( file.path(dataDir, "finalDataTrainSubset") )

exampleDF <- base::subset(head(trainDS), select = -ArrDel15)

testDS <- RxXdfData( file.path(dataDir, "finalDataTestSubset") )

dataToBeScored <- base::subset(head(testDS), select = -ArrDel15)

# Test the scoring function locally

scoringFn(exampleDF)

################################################
# Publish the scoring function as a web service
################################################

library(AzureML)

workspace <- workspace(config = "azureml-settings.json")

endpoint <- publishWebService(workspace, scoringFn,
                              name="Delay Prediction Service",
                              inputSchema = exampleDF)

################################################
# Score new data via the web service
################################################

scores <- consume(endpoint, dataToBeScored)

head(scores)