https://learn.microsoft.com/en-us/azure/machine-learning/how-to-setup-authentication?view=azureml-api-2&tabs=sdk
    

In [None]:
import time
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

import itertools
import random
import string
import json
import sklearn

from azureml.core import Model
from azureml.core.resource_configuration import ResourceConfiguration

import pickle

import mlflow
import mlflow.keras
from mlflow.deployments import get_deploy_client

import math 
import warnings


In [None]:

# Blob SAS URL 
url ='<Blob SAS URL >'

epa = pd.read_csv(url)
epa.head()


In [None]:
print(np.__version__)

In [None]:
from azureml.core import Workspace


ws = Workspace.from_config()
#print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\n')

In [None]:
model_metrics = pd.DataFrame(columns=["Model", "MSE", "RMSE", "R2"], dtype=object)


def metrics(model, y, y_hat):

    model_metrics.loc[-1] = {"Model" : model, 
              "MSE" : mean_squared_error(y, y_hat),
              "RMSE" : math.sqrt(mean_squared_error(y, y_hat)),
              "R2" : r2_score(y, y_hat)}

    model_metrics.index = model_metrics.index + 1
    return model_metrics 

#metrics("PCA Forest",y_test, epa_pca_y_pred)

In [None]:
#Drop the row number
epa = epa.drop(epa.columns[[0]], axis=1)

#replace the "." in the column names with "_"
epa.columns = epa.columns.str.replace('.', '_')

# Drop the first three columns
epa = epa.drop(epa.columns[[0,1,2]], axis=1)

# drop descrition columns
epa = epa.drop(epa.columns[[3,9,11]], axis=1)
epa


In [None]:
epa['Tested_Transmission_Type_Code']= epa['Tested_Transmission_Type_Code'].astype('category')
    
epa['Drive_System_Code']= epa['Drive_System_Code'].astype('category')

In [None]:
#One hot encode categories
epa = pd.get_dummies(epa)

In [None]:
# Create the training dataset for scikit learn, you will need all varialbes except the labe you are trying to predict
epa_X = epa.iloc[:, epa.columns !='FuelEcon']
#epa_X = epa.iloc[:, epa.columns =='Weight']
epa_X



In [None]:
epa_y = epa.iloc[:, epa.columns =='FuelEcon']

In [None]:
# Split the training and test set 
X_train, X_test, y_train, y_test = train_test_split(epa_X, epa_y, test_size=0.20)

In [None]:
experiment_name="EPA_RandomForest"
mlflow.set_experiment(experiment_name=experiment_name)
mlflow.sklearn.autolog()
run = mlflow.start_run()

In [None]:
epa_forest = RandomForestRegressor()
epa_forest.fit(X_train, y_train)

In [None]:
forest_y_hat = epa_forest.predict(X_test)

In [None]:
metrics("RandomForestRegressor",y_test, forest_y_hat)

In [None]:
mlflow.end_run()

In [None]:
run = mlflow.get_run(run.info.run_id)
pd.DataFrame(data=[run.data.params], index=["Value"]).T

In [None]:
mlflow.get_experiment_by_name
exp = mlflow.get_experiment_by_name(experiment_name)
last_run = mlflow.search_runs(exp.experiment_id, output_format="list")[-1]
print(last_run.info.run_id)

In [None]:
model_name = "EPA_RandomForest"
artifact_path = "model"

mlflow.register_model(f"runs:/{last_run.info.run_id}/{artifact_path}", model_name)

In [None]:
# Creating a unique endpoint name by including a random suffix
allowed_chars = string.ascii_lowercase + string.digits
endpoint_suffix = "".join(random.choice(allowed_chars) for x in range(5))
endpoint_name = "EPA-RandomForest-" + endpoint_suffix

print(f"Endpoint name: {endpoint_name}")

In [None]:
deployment_client = get_deploy_client(mlflow.get_tracking_uri())

In [None]:
endpoint = deployment_client.create_endpoint(endpoint_name)

In [None]:
scoring_uri = deployment_client.get_endpoint(endpoint=endpoint_name)["properties"][
    "scoringUri"
]
print(scoring_uri)

In [None]:
deployment_name = "default"
deploy_config = {
    "instance_type": "Standard_DS3_v2",
    "instance_count": 1,
    "failure_threshold": 60
}


deployment_config_path = "deployment_config.json"
with open(deployment_config_path, "w") as outfile:
    outfile.write(json.dumps(deploy_config))

In [None]:
version = 1

deployment = deployment_client.create_deployment(
    name=deployment_name,
    endpoint=endpoint_name,
    model_uri=f"models:/{model_name}/{version}",
    config={"deploy-config-file": deployment_config_path},
)

In [None]:
traffic_config = {"traffic": {deployment_name: 100}}


In [None]:
traffic_config_path = "traffic_config.json"
with open(traffic_config_path, "w") as outfile:
    outfile.write(json.dumps(traffic_config))
    

In [None]:
deployment_client.update_endpoint(
    endpoint=endpoint_name,
    config={"endpoint-config-file": traffic_config_path},
)

In [None]:
#deployment_client.delete_deployment(endpoint_name)