In [1]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
import mlflow
import mlflow.sklearn

In [2]:
data = load_iris()

In [3]:
data

In [4]:
data.target

In [5]:
data.target_names

In [6]:
data.feature_names

In [7]:
X=data.data
y=data.target

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=10)

In [9]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.decomposition import PCA

X = data.data[:, :2] 
y = data.target

x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
plt.figure(2, figsize=(8, 6))
plt.clf()

# Plot the training points
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Set1,
            edgecolor='k')
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')

plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.xticks(())
plt.yticks(())

fig = plt.figure(1, figsize=(8, 6))
ax = Axes3D(fig, elev=-150, azim=110)
X_reduced = PCA(n_components=3).fit_transform(data.data)
ax.scatter(X_reduced[:, 0], X_reduced[:, 1], X_reduced[:, 2], c=y,
           cmap=plt.cm.Set1, edgecolor='k', s=40)
ax.set_title("First three PCA directions")
ax.set_xlabel("1st eigenvector")
ax.w_xaxis.set_ticklabels([])
ax.set_ylabel("2nd eigenvector")
ax.w_yaxis.set_ticklabels([])
ax.set_zlabel("3rd eigenvector")
ax.w_zaxis.set_ticklabels([])

fig.savefig('iris1.png')
plt.close(fig)

display()

In [10]:
!rm -r /dbfs/mlflow/iris

In [11]:
with mlflow.start_run():
  dtc = DecisionTreeClassifier(random_state=10)
  dtc.fit(X_train, y_train)
  y_pred_class = dtc.predict(X_test)
  accuracy= metrics.accuracy_score(y_test, y_pred_class)
  
  print(accuracy)
  
  mlflow.log_param("random_state", 10)
  mlflow.log_metric("accuracy", accuracy)
  mlflow.sklearn.log_model(dtc, "model")
  modelpath = "/dbfs/mlflow/iris/model-%s-%f" % ("decision_tree", 1)
  mlflow.sklearn.save_model(dtc, modelpath)  
  
  mlflow.log_artifact("iris1.png")

In [12]:
with mlflow.start_run():
  dtc = DecisionTreeClassifier(max_depth=1, random_state=10)
  dtc.fit(X_train, y_train)
  y_pred_class = dtc.predict(X_test)
  accuracy= metrics.accuracy_score(y_test, y_pred_class)
  
  print(accuracy)
  
  mlflow.log_param("random_state", 10)
  mlflow.log_param("max_depth", 1)
  mlflow.log_metric("accuracy", accuracy)
  mlflow.sklearn.log_model(dtc, "model")
  modelpath = "/dbfs/mlflow/iris/model-%s-%f" % ("decision_tree", 2)
  mlflow.sklearn.save_model(dtc, modelpath)  

In [13]:
with mlflow.start_run():
  dtc = DecisionTreeClassifier(max_depth=1, min_samples_split=5, random_state=10)
  dtc.fit(X_train, y_train)
  y_pred_class = dtc.predict(X_test)
  accuracy= metrics.accuracy_score(y_test, y_pred_class)
  
  print(accuracy)
  
  mlflow.log_param("random_state", 10)
  mlflow.log_param("max_depth", 1)
  mlflow.log_param("min_samples_split", 5)
  mlflow.log_metric("accuracy", accuracy)
  mlflow.sklearn.log_model(dtc, "model")
  modelpath = "/dbfs/mlflow/iris/model-%s-%f" % ("decision_tree", 3)
  mlflow.sklearn.save_model(dtc, modelpath)  


In [14]:
with mlflow.start_run():
  knn = KNeighborsClassifier(n_neighbors=5)
  knn.fit(X_train, y_train)
  y_pred = knn.predict(X_test)
  accuracy=metrics.accuracy_score(y_test, y_pred)
  print(accuracy)
  
  mlflow.log_param("n_neighbors", 5)
  mlflow.log_metric("accuracy", accuracy)
  mlflow.sklearn.log_model(knn, "model")
  modelpath = "/dbfs/mlflow/iris/model-%s-%f" % ("KNN", 4)
  mlflow.sklearn.save_model(knn, modelpath)  

In [15]:
with mlflow.start_run():
  knn = KNeighborsClassifier(n_neighbors=2)
  knn.fit(X_train, y_train)
  y_pred = knn.predict(X_test)
  accuracy=metrics.accuracy_score(y_test, y_pred)
  print(accuracy)
  
  mlflow.log_param("n_neighbors", 2)
  mlflow.log_metric("accuracy", accuracy)
  mlflow.sklearn.log_model(knn, "model")
  modelpath = "/dbfs/mlflow/iris/model-%s-%f" % ("KNN", 5)
  mlflow.sklearn.save_model(knn, modelpath)

In [16]:
mlflow.search_runs()

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.accuracy,params.n_neighbors,params.random_state,params.min_samples_split,params.max_depth,tags.mlflow.databricks.notebookID,tags.mlflow.user,tags.mlflow.databricks.notebookRevisionID,tags.mlflow.source.name,tags.mlflow.databricks.notebookPath,tags.mlflow.databricks.webappURL,tags.mlflow.source.type
0,cb4e9d588e7f477f88bde71e96d3de85,245898718684243,FINISHED,dbfs:/databricks/mlflow/245898718684243/cb4e9d...,2020-02-17 23:42:50.392000+00:00,2020-02-17 23:42:52.121000+00:00,0.947368,2.0,,,,245898718684243,srivatsan88@yahoo.com,1581982972203,/Users/srivatsan88@yahoo.com/MLFlow Tracking Demo,/Users/srivatsan88@yahoo.com/MLFlow Tracking Demo,https://community.cloud.databricks.com,NOTEBOOK
1,ad87f2680e3c40a0bbb73e5afeb39d7e,245898718684243,FINISHED,dbfs:/databricks/mlflow/245898718684243/ad87f2...,2020-02-17 23:42:44.248000+00:00,2020-02-17 23:42:45.995000+00:00,0.973684,5.0,,,,245898718684243,srivatsan88@yahoo.com,1581982966076,/Users/srivatsan88@yahoo.com/MLFlow Tracking Demo,/Users/srivatsan88@yahoo.com/MLFlow Tracking Demo,https://community.cloud.databricks.com,NOTEBOOK
2,2edd59a10ee54b85bb27faa95eb22a73,245898718684243,FINISHED,dbfs:/databricks/mlflow/245898718684243/2edd59...,2020-02-17 23:42:23.008000+00:00,2020-02-17 23:42:24.817000+00:00,0.605263,,10.0,5.0,1.0,245898718684243,srivatsan88@yahoo.com,1581982944894,/Users/srivatsan88@yahoo.com/MLFlow Tracking Demo,/Users/srivatsan88@yahoo.com/MLFlow Tracking Demo,https://community.cloud.databricks.com,NOTEBOOK
3,025c3ed0c1bf4e23a35c09e0fa2a6221,245898718684243,FINISHED,dbfs:/databricks/mlflow/245898718684243/025c3e...,2020-02-17 23:42:08.613000+00:00,2020-02-17 23:42:10.478000+00:00,0.605263,,10.0,,1.0,245898718684243,srivatsan88@yahoo.com,1581982930557,/Users/srivatsan88@yahoo.com/MLFlow Tracking Demo,/Users/srivatsan88@yahoo.com/MLFlow Tracking Demo,https://community.cloud.databricks.com,NOTEBOOK
4,12cb8910f9b04166a9f174763d77b111,245898718684243,FINISHED,dbfs:/databricks/mlflow/245898718684243/12cb89...,2020-02-17 23:39:55.639000+00:00,2020-02-17 23:39:57.912000+00:00,0.973684,,10.0,,,245898718684243,srivatsan88@yahoo.com,1581982797990,/Users/srivatsan88@yahoo.com/MLFlow Tracking Demo,/Users/srivatsan88@yahoo.com/MLFlow Tracking Demo,https://community.cloud.databricks.com,NOTEBOOK


In [17]:
run_id1 = "ad87f2680e3c40a0bbb73e5afeb39d7e"
model_uri = "runs:/" + run_id1 + "/model"

In [18]:
model = mlflow.sklearn.load_model(model_uri=model_uri)

In [19]:
model.get_params()

In [20]:
model.predict_proba(X_test)