In [None]:
# Watson ML for Developers

## Import, Clean, and Analyze Data
We are going to use PixieDust to load and visualize our data.

In [1]:
import pixiedust

Waiting for a Spark session to start...
Spark Initialization Done! ApplicationId = app-20191119223446-0004
KERNEL_ID = 5b18f11f-a7b8-44f1-9047-a30024b6823c
Pixiedust database opened successfully


In [2]:
df = pixiedust.sampleData("https://raw.githubusercontent.com/markwatsonatx/watson-ml-for-developers/master/data/house-prices.csv", forcePandas=True)

Downloading 'https://raw.githubusercontent.com/markwatsonatx/watson-ml-for-developers/master/data/house-prices.csv' from https://raw.githubusercontent.com/markwatsonatx/watson-ml-for-developers/master/data/house-prices.csv
Downloaded 92 bytes
Creating pandas DataFrame for 'https://raw.githubusercontent.com/markwatsonatx/watson-ml-for-developers/master/data/house-prices.csv'. Please wait...
Loading file using 'pandas'
Successfully created pandas DataFrame for 'https://raw.githubusercontent.com/markwatsonatx/watson-ml-for-developers/master/data/house-prices.csv'


In [None]:
display(df)
from pyspark.sql import SQLContext
#print sc
#sqlCtx = SQLContext(sc)
sqlCtx.createDataFrame(df).show()
spdf=sqlCtx.createDataFrame(df)

## Build a Machine Learning Model with Spark ML 

In [4]:
from pyspark.ml import Pipeline
from pyspark.ml.regression import LinearRegression
from pyspark.ml.feature import VectorAssembler

In [5]:
assembler = VectorAssembler(inputCols=['SquareFeet','Bedrooms'],outputCol="features")
lr = LinearRegression(labelCol='Price', featuresCol='features')
pipeline = Pipeline(stages=[assembler, lr])
model = pipeline.fit(spdf)

### Test the Model witl Spark ML

In [6]:
def get_prediction(square_feet, num_bedrooms):
    request_df = spark.createDataFrame([(square_feet, num_bedrooms)], ['SquareFeet','Bedrooms'])
    response_df = model.transform(request_df)
    return response_df

In [7]:
response = get_prediction(2400, 4)
response.show()

+----------+--------+------------+------------------+
|SquareFeet|Bedrooms|    features|        prediction|
+----------+--------+------------+------------------+
|      2400|       4|[2400.0,4.0]|137499.99999999968|
+----------+--------+------------+------------------+



### Save the Model and Training Data

In [9]:
# If you rerun this cell, save model and training data with new name
model.save( "Home Prices Model Final1" )
spdf.write.save( "training-data-Final1.parquet" )

# Deploy Model to Watson ML

In [10]:
import json
import requests
import urllib3

In [11]:
!pip install watson-machine-learning-client

Collecting watson-machine-learning-client
  Using cached https://files.pythonhosted.org/packages/12/67/66db412f00d19bfdc5725078bff373787513bfb14320f2804b9db3abb53a/watson_machine_learning_client-1.0.378-py3-none-any.whl
Collecting tabulate (from watson-machine-learning-client)
Collecting certifi (from watson-machine-learning-client)
  Using cached https://files.pythonhosted.org/packages/18/b0/8146a4f8dd402f60744fa380bc73ca47303cccf8b9190fd16a827281eac2/certifi-2019.9.11-py2.py3-none-any.whl
Collecting urllib3 (from watson-machine-learning-client)
  Using cached https://files.pythonhosted.org/packages/b4/40/a9837291310ee1ccc242ceb6ebfd9eb21539649f193a7c8c86ba15b98539/urllib3-1.25.7-py2.py3-none-any.whl
Collecting pandas (from watson-machine-learning-client)
  Using cached https://files.pythonhosted.org/packages/52/3f/f6a428599e0d4497e1595030965b5ba455fd8ade6e977e3c819973c4b41d/pandas-0.25.3-cp36-cp36m-manylinux1_x86_64.whl
Collecting requests (from watson-machine-learning-client)
  Usin

### Fill in Your Watson ML Credentials

In [12]:
from watson_machine_learning_client import WatsonMachineLearningAPIClient
wml_credentials = { "apikey" : "5N2kiV107Xi4zaSHtBgksTYFmXKpxgeW3-ua4MOGGsPw", 
                     "url" : "https://us-south.ml.cloud.ibm.com",
                     "instance_id" : "0cb8b342-7712-4f9d-a3ba-361ae98b4ff1" 
                  }

client = WatsonMachineLearningAPIClient( wml_credentials )

In [13]:
# Service Credential from my Watson ML Service
{
  "apikey": "5N2kiV107Xi4zaSHtBgksTYFmXKpxgeW3-ua4MOGGsPw",
  "iam_apikey_description": "Auto-generated for key 4ff38372-a28f-4f1b-b5b3-0e8376b03d87",
  "iam_apikey_name": "Service credentials-1",
  "iam_role_crn": "crn:v1:bluemix:public:iam::::serviceRole:Writer",
  "iam_serviceid_crn": "crn:v1:bluemix:public:iam-identity::a/9e38ead7bd5743c3bd47f0b78c580073::serviceid:ServiceId-8eeeb26f-5352-47d7-80c6-caa199ff94ed",
  "instance_id": "0cb8b342-7712-4f9d-a3ba-361ae98b4ff1",
  "url": "https://us-south.ml.cloud.ibm.com"
}


{'apikey': '5N2kiV107Xi4zaSHtBgksTYFmXKpxgeW3-ua4MOGGsPw',
 'iam_apikey_description': 'Auto-generated for key 4ff38372-a28f-4f1b-b5b3-0e8376b03d87',
 'iam_apikey_name': 'Service credentials-1',
 'iam_role_crn': 'crn:v1:bluemix:public:iam::::serviceRole:Writer',
 'iam_serviceid_crn': 'crn:v1:bluemix:public:iam-identity::a/9e38ead7bd5743c3bd47f0b78c580073::serviceid:ServiceId-8eeeb26f-5352-47d7-80c6-caa199ff94ed',
 'instance_id': '0cb8b342-7712-4f9d-a3ba-361ae98b4ff1',
 'url': 'https://us-south.ml.cloud.ibm.com'}

### Store the model in Watson ML

In [14]:
from pyspark.ml import PipelineModel
pipeline_model = PipelineModel.load( "Home Prices Model" )
pipeline = Pipeline( stages = pipeline_model.stages )
train = spark.read.load( "training-data.parquet" )

In [15]:
# Store the PipelineModel in the Watson Machine Learning repository
model_details = client.repository.store_model( pipeline_model, 'Home Prices model', training_data=train, pipeline=pipeline )

### Deploy the stored model in Watson ML service

In [16]:
# Deploy the stored model as an online web service deployment
model_id = model_details["metadata"]["guid"]
deployment_details = client.deployments.create( artifact_uid=model_id, name="Spark MLlib model deployment" )



#######################################################################################

Synchronous deployment creation for uid: '1ff28b6e-8e4a-4e9c-891a-0b824b76154b' started

#######################################################################################


INITIALIZING
DEPLOY_SUCCESS


------------------------------------------------------------------------------------------------
Successfully finished deployment creation, deployment_uid='ab08384b-64ba-41eb-ac16-37ded9968724'
------------------------------------------------------------------------------------------------




In [17]:
#### Test the deployment
model_endpoint_url = client.deployments.get_scoring_url( deployment_details )
payload = {'fields': ['SquareFeet','Bedrooms'], 'values': [[2400, 4]]}
client.deployments.score( model_endpoint_url, payload )

{'fields': ['SquareFeet', 'Bedrooms', 'features', 'prediction'],
 'values': [[2400, 4, [2400.0, 4.0], 137499.99999999968]]}

In [18]:
#### Testing the model locally gets the same results (testing again similar as above in cell "Test the Model witl Spark ML")
test_df = spark.createDataFrame([(2400, 4)], ['SquareFeet','Bedrooms'])
response_df = model.transform(test_df).show()

+----------+--------+------------+------------------+
|SquareFeet|Bedrooms|    features|        prediction|
+----------+--------+------------+------------------+
|      2400|       4|[2400.0,4.0]|137499.99999999968|
+----------+--------+------------+------------------+

