In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col
from pyspark.sql.types import IntegerType

spark = SparkSession.builder.getOrCreate()

In [None]:
df = spark.read.format('org.apache.spark.sql.execution.datasources.csv.CSVFileFormat').option('header', 'true').load('/usr/data/house-prices.csv')

In [None]:
df = df.withColumn('SquareFeet', df['SquareFeet'].cast(IntegerType()))
df = df.withColumn('Bedrooms', df['Bedrooms'].cast(IntegerType()))
df = df.withColumn('Price', df['Price'].cast(IntegerType()))

In [None]:
df.show()

In [None]:
from pyspark.ml import Pipeline
from pyspark.ml.regression import LinearRegression
from pyspark.ml.feature import VectorAssembler

In [None]:
assembler = VectorAssembler(inputCols=['SquareFeet','Bedrooms'],outputCol="features")
lr = LinearRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8, labelCol='Price', featuresCol='features')
pipeline = Pipeline(stages=[assembler, lr])
model = pipeline.fit(df)

In [None]:
def get_prediction(square_feet, num_bedrooms):
    df_req = spark.createDataFrame([(square_feet, num_bedrooms)],
                                   ['SquareFeet','Bedrooms'])
    df_res = model.transform(df_req)
    return df_res

In [None]:
res = get_prediction(2400, 4)
res.show()

# Deploy to Watson ML

In [None]:
from watson_machine_learning_client import WatsonMachineLearningAPIClient

In [None]:
import os

model_name = 'House Prices Model 2'
deployment_name = 'House Prices Deployment 2'
wml_credentials={
  'url': os.environ['WML_SERVICE_PATH'],
  'username': os.environ['WML_USERNAME'],
  'password': os.environ['WML_PASSWORD'],
  'instance_id': os.environ['WML_INSTANCE_ID']
}

In [None]:
client = WatsonMachineLearningAPIClient(wml_credentials)

In [None]:
#client.repository.delete('94597a4d-03cf-4fdb-b71a-94dd86bca2c1')

In [None]:
client.repository.list_models()

In [None]:
model_props = {
    client.repository.ModelMetaNames.AUTHOR_NAME: 'Mark Watson', 
    client.repository.ModelMetaNames.AUTHOR_EMAIL: 'markwats@us.ibm.com',
    client.repository.ModelMetaNames.NAME: model_name
}

In [None]:
saved_model = client.repository.store_model(model=model, pipeline=pipeline, meta_props=model_props, training_data=df)
saved_model

In [None]:
model_id = client.repository.get_model_uid(saved_model)

In [None]:
deployment = client.deployments.create(model_id, deployment_name)

In [None]:
scoring_url = client.deployments.get_scoring_url(deployment)

print(scoring_url)

In [None]:
def get_prediction_from_watson_ml(square_feet, num_bedrooms):
    scoring_payload = {'fields': ['SquareFeet','Bedrooms'], 'values': [[square_feet, num_bedrooms]]}
    print(scoring_payload)
    predictions = client.deployments.score(scoring_url, scoring_payload)
    return predictions

In [None]:
response = get_prediction_from_watson_ml(2400, 4)

In [None]:
print(response)