In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib
import math 

In [None]:
print(pd.__version__)

In [None]:
from azureml.core import Workspace


ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\n')

In [None]:
model_metrics = pd.DataFrame(columns=["Model", "MSE", "RMSE", "R2"], dtype=object)


def metrics(model, y, y_hat):

    model_metrics.loc[-1] = {"Model" : model, 
              "MSE" : mean_squared_error(y, y_hat),
              "RMSE" : math.sqrt(mean_squared_error(y, y_hat)),
              "R2" : r2_score(y, y_hat)}

    model_metrics.index = model_metrics.index + 1
    return model_metrics 

#metrics("PCA Forest",y_test, epa_pca_y_pred)

In [None]:
epa = pd.read_csv('https://raw.githubusercontent.com/sqlshep/SQLShepBlog/master/data/epaMpg.csv')

In [None]:
epa.head(10)

In [None]:
epa.shape

In [None]:
#Drop the row number
epa = epa.drop(epa.columns[[0]], axis=1)

#replace the "." in the column names with "_"
epa.columns = epa.columns.str.replace('.', '_')

# Drop the first three columns
epa = epa.drop(epa.columns[[0,1,2]], axis=1)

# drop descrition columns
epa = epa.drop(epa.columns[[3,9,11]], axis=1)
epa


In [None]:
epa['Tested_Transmission_Type_Code']= epa['Tested_Transmission_Type_Code'].astype('category')
    
epa['Drive_System_Code']= epa['Drive_System_Code'].astype('category')

In [None]:
#One hot encode categories
epa = pd.get_dummies(epa)

In [None]:
# Create the training dataset for scikit learn, you will need all varialbes except the labe you are trying to predict
epa_X = epa.iloc[:, epa.columns !='FuelEcon']
#epa_X = epa.iloc[:, epa.columns =='Weight']
epa_X



In [None]:
epa_y = epa.iloc[:, epa.columns =='FuelEcon']

In [None]:
# Split the training and test set 
X_train, X_test, y_train, y_test = train_test_split(epa_X, epa_y, test_size=0.20)

In [None]:
epa_forest = RandomForestRegressor()
epa_forest.fit(X_train, y_train)

In [None]:
forest_y_hat = epa_forest.predict(X_test)

In [None]:
metrics("RandomForestRegressor",y_test, forest_y_hat)

In [None]:
joblib.dump(epa_forest, 'EPA_sklearn_regression_model.pkl')

In [None]:
import numpy as np

from azureml.core import Dataset


np.savetxt('EPA_features.csv', epa_X, delimiter=',')
np.savetxt('EPA_labels.csv', epa_y, delimiter=',')

datastore = ws.get_default_datastore()
datastore.upload_files(files=['./EPA_features.csv', './EPA_labels.csv'],
                       target_path='EPA_sklearn_regression/',
                       overwrite=True)

input_dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, 'EPA_sklearn_regression/EPA_features.csv')])
output_dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, 'EPA_sklearn_regression/EPA_labels.csv')])

In [None]:
import sklearn

from azureml.core import Model
from azureml.core.resource_configuration import ResourceConfiguration


model = Model.register(workspace=ws,
                       model_name='EPA-sklearn-model-R-Forest',                # Name of the registered model in your workspace.
                       model_path='./EPA_sklearn_regression_model.pkl',  # Local file to upload and register as a model.
                       model_framework=Model.Framework.SCIKITLEARN,  # Framework used to create the model.
                       model_framework_version=sklearn.__version__,  # Version of scikit-learn used to create the model.
                       sample_input_dataset=input_dataset,
                       sample_output_dataset=output_dataset,
                       resource_configuration=ResourceConfiguration(cpu=1, memory_in_gb=0.5),
                       description='SKLearn Random Forest to predict EPA MPG.',
                       tags={'area': 'EPA', 'type': 'regression'})

print('Name:', model.name)
print('Version:', model.version)

In [None]:
service_name = 'epa-sklearn-forest-service'

service = Model.deploy(ws, service_name, [model], overwrite=True)
service.wait_for_deployment(show_output=True)

In [None]:
X_test[0:1].to_numpy().tolist()

In [None]:
import json

input_payload = json.dumps({
    'data': X_test[64:65].to_numpy().tolist(),
    'method': 'predict'  # If you have a classification model, you can get probabilities by changing this to 'predict_proba'.
})

output = service.run(input_payload)

print(output)

In [None]:
#service.delete()