In [1]:
# Watson ML for Developers

Waiting for a Spark session to start...
Spark Initialization Done! ApplicationId = app-20191120002639-0005
KERNEL_ID = 5b18f11f-a7b8-44f1-9047-a30024b6823c


## Import, Clean, and Analyze Data
We are going to use PixieDust to load and visualize our data.

In [2]:
import pixiedust

Pixiedust database opened successfully


In [3]:
df = pixiedust.sampleData("https://raw.githubusercontent.com/markwatsonatx/watson-ml-for-developers/master/data/house-prices.csv", forcePandas=True)

Downloading 'https://raw.githubusercontent.com/markwatsonatx/watson-ml-for-developers/master/data/house-prices.csv' from https://raw.githubusercontent.com/markwatsonatx/watson-ml-for-developers/master/data/house-prices.csv
Downloaded 92 bytes
Creating pandas DataFrame for 'https://raw.githubusercontent.com/markwatsonatx/watson-ml-for-developers/master/data/house-prices.csv'. Please wait...
Loading file using 'pandas'
Successfully created pandas DataFrame for 'https://raw.githubusercontent.com/markwatsonatx/watson-ml-for-developers/master/data/house-prices.csv'


In [None]:
display(df)
from pyspark.sql import SQLContext
#print sc
#sqlCtx = SQLContext(sc)
sqlCtx.createDataFrame(df).show()
spdf=sqlCtx.createDataFrame(df)

## Build a Machine Learning Model with Spark ML 

In [5]:
from pyspark.ml import Pipeline
from pyspark.ml.regression import LinearRegression
from pyspark.ml.feature import VectorAssembler

In [6]:
assembler = VectorAssembler(inputCols=['SquareFeet','Bedrooms'],outputCol="features")
lr = LinearRegression(labelCol='Price', featuresCol='features')
pipeline = Pipeline(stages=[assembler, lr])
model = pipeline.fit(spdf)

### Test the Model witl Spark ML

In [7]:
def get_prediction(square_feet, num_bedrooms):
    request_df = spark.createDataFrame([(square_feet, num_bedrooms)], ['SquareFeet','Bedrooms'])
    response_df = model.transform(request_df)
    return response_df

In [8]:
response = get_prediction(2400, 4)
response.show()

+----------+--------+------------+------------------+
|SquareFeet|Bedrooms|    features|        prediction|
+----------+--------+------------+------------------+
|      2400|       4|[2400.0,4.0]|137499.99999999968|
+----------+--------+------------+------------------+



### Save the Model and Training Data

In [10]:
# If you rerun this cell, save model and training data with new name
model.save( "Home Prices Model Final2" )
spdf.write.save( "training-data-Final2.parquet" )

# Deploy Model to Watson ML

In [11]:
import json
import requests
import urllib3

In [12]:
!pip install watson-machine-learning-client

Collecting watson-machine-learning-client
  Using cached https://files.pythonhosted.org/packages/12/67/66db412f00d19bfdc5725078bff373787513bfb14320f2804b9db3abb53a/watson_machine_learning_client-1.0.378-py3-none-any.whl
Collecting pandas (from watson-machine-learning-client)
  Using cached https://files.pythonhosted.org/packages/52/3f/f6a428599e0d4497e1595030965b5ba455fd8ade6e977e3c819973c4b41d/pandas-0.25.3-cp36-cp36m-manylinux1_x86_64.whl
Collecting ibm-cos-sdk (from watson-machine-learning-client)
Collecting tabulate (from watson-machine-learning-client)
Collecting lomond (from watson-machine-learning-client)
  Using cached https://files.pythonhosted.org/packages/0f/b1/02eebed49c754b01b17de7705caa8c4ceecfb4f926cdafc220c863584360/lomond-0.3.3-py2.py3-none-any.whl
Collecting certifi (from watson-machine-learning-client)
  Using cached https://files.pythonhosted.org/packages/18/b0/8146a4f8dd402f60744fa380bc73ca47303cccf8b9190fd16a827281eac2/certifi-2019.9.11-py2.py3-none-any.whl
Collec

### Fill in Your Watson ML Credentials

In [13]:
from watson_machine_learning_client import WatsonMachineLearningAPIClient
wml_credentials = { "apikey" : "5N2kiV107Xi4zaSHtBgksTYFmXKpxgeW3-ua4MOGGsPw", 
                     "url" : "https://us-south.ml.cloud.ibm.com",
                     "instance_id" : "0cb8b342-7712-4f9d-a3ba-361ae98b4ff1" 
                  }

client = WatsonMachineLearningAPIClient( wml_credentials )

In [14]:
# Service Credential from my Watson ML Service
{
  "apikey": "5N2kiV107Xi4zaSHtBgksTYFmXKpxgeW3-ua4MOGGsPw",
  "iam_apikey_description": "Auto-generated for key 4ff38372-a28f-4f1b-b5b3-0e8376b03d87",
  "iam_apikey_name": "Service credentials-1",
  "iam_role_crn": "crn:v1:bluemix:public:iam::::serviceRole:Writer",
  "iam_serviceid_crn": "crn:v1:bluemix:public:iam-identity::a/9e38ead7bd5743c3bd47f0b78c580073::serviceid:ServiceId-8eeeb26f-5352-47d7-80c6-caa199ff94ed",
  "instance_id": "0cb8b342-7712-4f9d-a3ba-361ae98b4ff1",
  "url": "https://us-south.ml.cloud.ibm.com"
}


{'apikey': '5N2kiV107Xi4zaSHtBgksTYFmXKpxgeW3-ua4MOGGsPw',
 'iam_apikey_description': 'Auto-generated for key 4ff38372-a28f-4f1b-b5b3-0e8376b03d87',
 'iam_apikey_name': 'Service credentials-1',
 'iam_role_crn': 'crn:v1:bluemix:public:iam::::serviceRole:Writer',
 'iam_serviceid_crn': 'crn:v1:bluemix:public:iam-identity::a/9e38ead7bd5743c3bd47f0b78c580073::serviceid:ServiceId-8eeeb26f-5352-47d7-80c6-caa199ff94ed',
 'instance_id': '0cb8b342-7712-4f9d-a3ba-361ae98b4ff1',
 'url': 'https://us-south.ml.cloud.ibm.com'}

### Store the model in Watson ML

In [15]:
from pyspark.ml import PipelineModel
pipeline_model = PipelineModel.load( "Home Prices Model" )
pipeline = Pipeline( stages = pipeline_model.stages )
train = spark.read.load( "training-data.parquet" )

In [16]:
# Store the PipelineModel in the Watson Machine Learning repository
model_details = client.repository.store_model( pipeline_model, 'Home Prices model', training_data=train, pipeline=pipeline )

### Deploy the stored model in Watson ML service

In [17]:
# Deploy the stored model as an online web service deployment
model_id = model_details["metadata"]["guid"]
deployment_details = client.deployments.create( artifact_uid=model_id, name="Spark MLlib model deployment" )



#######################################################################################

Synchronous deployment creation for uid: 'c4181b38-622b-4949-85d3-aab017586fa6' started

#######################################################################################


INITIALIZING
DEPLOY_SUCCESS


------------------------------------------------------------------------------------------------
Successfully finished deployment creation, deployment_uid='bf0ad2d7-6bcc-4cdf-bd7f-2d5ca3f53dd9'
------------------------------------------------------------------------------------------------




In [18]:
#### Test the deployment
model_endpoint_url = client.deployments.get_scoring_url( deployment_details )
payload = {'fields': ['SquareFeet','Bedrooms'], 'values': [[2400, 4]]}
client.deployments.score( model_endpoint_url, payload )

{'fields': ['SquareFeet', 'Bedrooms', 'features', 'prediction'],
 'values': [[2400, 4, [2400.0, 4.0], 137499.99999999968]]}

In [19]:
#### Testing the model locally gets the same results (testing again similar as above in cell "Test the Model witl Spark ML")
test_df = spark.createDataFrame([(2400, 4)], ['SquareFeet','Bedrooms'])
response_df = model.transform(test_df).show()

+----------+--------+------------+------------------+
|SquareFeet|Bedrooms|    features|        prediction|
+----------+--------+------------+------------------+
|      2400|       4|[2400.0,4.0]|137499.99999999968|
+----------+--------+------------+------------------+



In [33]:
service_path = 'https://watson-ml-api.mybluemix.net'
username = 'nb16549n@pace.edu'
password = 'Nick9785'
instance_id = '0cb8b342-7712-4f9d-a3ba-361ae98b4ff1'
model_name = 'House Prices Model'
deployment_name = 'House Prices Deployment'

In [34]:
from repository.mlrepositoryclient import MLRepositoryClient
from repository.mlrepositoryartifact import MLRepositoryArtifact
ml_repository_client = MLRepositoryClient(service_path)
ml_repository_client.authorize(username, password)

ApiException: (404)
Reason: Unable to authenticate due to error :
AttributeError
'HTTPResponse' object has no attribute 'text'


In [24]:
model_artifact = MLRepositoryArtifact(
   model,
   training_data=df,
   name=model_name
)
saved_model = ml_repository_client.models.save(model_artifact)
model_id = saved_model.uid

ValueError: Invalid type for training_data: DataFrame

In [30]:
headers = urllib3.util.make_headers(
   basic_auth='{}:{}'.format(username, password)
)
url = '{}/v3/identity/token'.format(service_path)
response = requests.get(url, headers=headers)
ml_token = 'Bearer ' + json.loads(response.text).get('token')

TypeError: must be str, not NoneType

In [26]:
deployment_url = service_path
   + "/v3/wml_instances/" + instance_id
   + "/published_models/" + model_id
   + "/deployments/"
deployment_header = {
   'Content-Type': 'application/json',
   'Authorization': ml_token
}
deployment_payload = {
   "type": "online",
   "name": deployment_name
}
deployment_response = requests.post(
   deployment_url,
   json=deployment_payload,
   headers=deployment_header
)
scoring_url = json.loads(deployment_response.text)
   .get('entity')
   .get('scoring_url')
print scoring_url

IndentationError: unexpected indent (<ipython-input-26-9dec80d6daea>, line 2)

In [27]:
def get_prediction_from_watson_ml(square_feet, num_bedrooms):
   scoring_header = {
      'Content-Type': 'application/json',
      'Authorization': ml_token
   }
   scoring_payload = {
      'fields': ['SquareFeet','Bedrooms'],
      'values': [[square_feet, num_bedrooms]]
   }
   scoring_response = requests.post(
      scoring_url,
      json=scoring_payload,
      headers=scoring_header
   )
   return scoring_response.text

In [28]:
response = get_prediction_from_watson_ml(2400, 4)
print response

SyntaxError: Missing parentheses in call to 'print'. Did you mean print(response)? (<ipython-input-28-1e4157e30105>, line 2)

In [29]:
{
  "fields": ["SquareFeet", "Bedrooms", "features", "prediction"],
  "values": [[2400, 4, [2400.0, 4.0], 137499.99999999968]]
}

{'fields': ['SquareFeet', 'Bedrooms', 'features', 'prediction'],
 'values': [[2400, 4, [2400.0, 4.0], 137499.99999999968]]}