In [3]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col
from pyspark.sql.types import IntegerType

spark = SparkSession.builder.getOrCreate()

In [4]:
df = spark.read.format('org.apache.spark.sql.execution.datasources.csv.CSVFileFormat').option('header', 'true').load('/usr/data/house-prices.csv')

In [5]:
df = df.withColumn('SquareFeet', df['SquareFeet'].cast(IntegerType()))
df = df.withColumn('Bedrooms', df['Bedrooms'].cast(IntegerType()))
df = df.withColumn('Price', df['Price'].cast(IntegerType()))

In [6]:
df.show()

+----------+--------+-----+------+
|SquareFeet|Bedrooms|Color| Price|
+----------+--------+-----+------+
|      2100|       3|White|100000|
|      2300|       4|White|125000|
|      2500|       4|Brown|150000|
+----------+--------+-----+------+



In [7]:
from pyspark.ml import Pipeline
from pyspark.ml.regression import LinearRegression
from pyspark.ml.feature import VectorAssembler

In [8]:
assembler = VectorAssembler(inputCols=['SquareFeet','Bedrooms'],outputCol="features")
lr = LinearRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8, labelCol='Price', featuresCol='features')
pipeline = Pipeline(stages=[assembler, lr])
model = pipeline.fit(df)

In [9]:
def get_prediction(square_feet, num_bedrooms):
    df_req = spark.createDataFrame([(square_feet, num_bedrooms)],
                                   ['SquareFeet','Bedrooms'])
    df_res = model.transform(df_req)
    return df_res

In [10]:
res = get_prediction(2400, 4)
res.show()

+----------+--------+------------+------------------+
|SquareFeet|Bedrooms|    features|        prediction|
+----------+--------+------------+------------------+
|      2400|       4|[2400.0,4.0]|137499.84066275216|
+----------+--------+------------+------------------+



# Deploy to Watson ML

In [11]:
from watson_machine_learning_client import WatsonMachineLearningAPIClient

In [12]:
import os

model_name = 'House Prices Model 2'
deployment_name = 'House Prices Deployment 2'
wml_credentials={
  'url': os.environ['WML_SERVICE_PATH'],
  'username': os.environ['WML_USERNAME'],
  'password': os.environ['WML_PASSWORD'],
  'instance_id': os.environ['WML_INSTANCE_ID']
}

In [13]:
client = WatsonMachineLearningAPIClient(wml_credentials)

In [14]:
#client.repository.delete('94597a4d-03cf-4fdb-b71a-94dd86bca2c1')

In [15]:
client.repository.list_models()

------------------------------------  ------------------  ------------------------  ---------  -----
GUID                                  NAME                CREATED                   FRAMEWORK  TYPE
1d2faf4e-d042-4d8d-b2dd-ec00896577a2  House Prices Model  2018-02-13T22:43:11.048Z  mllib-2.1  model
6bcbc9e7-789e-446f-b61f-66f1e3ef06c5  Shopping History    2018-02-15T21:45:56.492Z  mllib-2.1  model
------------------------------------  ------------------  ------------------------  ---------  -----


In [16]:
model_props = {
    client.repository.ModelMetaNames.AUTHOR_NAME: 'Mark Watson', 
    client.repository.ModelMetaNames.AUTHOR_EMAIL: 'markwats@us.ibm.com',
    client.repository.ModelMetaNames.NAME: model_name
}

In [15]:
saved_model = client.repository.store_model(model=model, pipeline=pipeline, meta_props=model_props, training_data=df)
saved_model

{'entity': {'author': {'email': 'markwats@us.ibm.com', 'name': 'Mark Watson'},
  'deployments': {'count': 0,
   'url': 'https://ibm-watson-ml.mybluemix.net/v3/wml_instances/840321b3-7fb0-4d84-bc6b-33a371b9ecb6/published_models/f514e05d-bc3e-4b4a-bfd2-bc417826a1ab/deployments'},
  'evaluation_metrics_url': 'https://ibm-watson-ml.mybluemix.net/v3/wml_instances/840321b3-7fb0-4d84-bc6b-33a371b9ecb6/published_models/f514e05d-bc3e-4b4a-bfd2-bc417826a1ab/evaluation_metrics',
  'feedback_url': 'https://ibm-watson-ml.mybluemix.net/v3/wml_instances/840321b3-7fb0-4d84-bc6b-33a371b9ecb6/published_models/f514e05d-bc3e-4b4a-bfd2-bc417826a1ab/feedback',
  'input_data_schema': {'fields': [{'metadata': {},
     'name': 'SquareFeet',
     'nullable': True,
     'type': 'integer'},
    {'metadata': {}, 'name': 'Bedrooms', 'nullable': True, 'type': 'integer'},
    {'metadata': {}, 'name': 'Color', 'nullable': True, 'type': 'string'}],
   'type': 'struct'},
  'label_col': 'Price',
  'latest_version': {'cre

In [24]:
model_id = client.repository.get_model_uid(saved_model)

In [25]:
deployment = client.deployments.create(model_id, deployment_name)

In [27]:
scoring_url = client.deployments.get_scoring_url(deployment)

print(scoring_url)

https://ibm-watson-ml.mybluemix.net/v3/wml_instances/840321b3-7fb0-4d84-bc6b-33a371b9ecb6/published_models/f514e05d-bc3e-4b4a-bfd2-bc417826a1ab/deployments/587a3f56-b5d7-407f-aa57-8976a8944e4a/online


In [33]:
def get_prediction_from_watson_ml(square_feet, num_bedrooms):
    scoring_payload = {'fields': ['SquareFeet','Bedrooms'], 'values': [[square_feet, num_bedrooms]]}
    print(scoring_payload)
    predictions = client.deployments.score(scoring_url, scoring_payload)
    return predictions

In [34]:
response = get_prediction_from_watson_ml(2400, 4)

{'fields': ['SquareFeet', 'Bedrooms'], 'values': [[2400, 4]]}


In [36]:
print(response)

{'fields': ['SquareFeet', 'Bedrooms', 'features', 'prediction'], 'values': [[2400, 4, [2400.0, 4.0], 137499.84066275216]]}
