In [2]:
import pandas as pd
from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
import mlflow, os
import mlflow.sklearn
from mlflow.tracking import MlflowClient
from sklearn.metrics import roc_auc_score, accuracy_score
from sklearn.model_selection import train_test_split

os.environ['MLFLOW_TRACKING_URI']='http://localhost:5000'
iris = datasets.load_iris()
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
y = iris.target
iris_df['target'] = y

print(iris_df.head())


train_df, test_df = train_test_split(iris_df, test_size=0.3, random_state=42, stratify=iris_df["target"])
X_train = train_df[["sepal length (cm)", "sepal width (cm)", "petal length (cm)", "petal width (cm)"]]
y_train = train_df["target"]

X_test = test_df[["sepal length (cm)", "sepal width (cm)", "petal length (cm)", "petal width (cm)"]]
y_test = test_df["target"]
# print(iris)
# print(iris_df.head())



EXPERIMENT_NAME = "IRIS dataset classification"



print("IRIS train df shape")
print(X_train.shape)
print(y_train.shape)

print("IRIS test df shape")
print(X_test.shape)
print(y_test.shape)

mlflow_client = MlflowClient()

# Create an MLFlow experiment, if not already exists
experiment_details = mlflow_client.get_experiment_by_name(EXPERIMENT_NAME)

if experiment_details is not None:
    experiment_id = experiment_details.experiment_id
else:
    experiment_id = mlflow.create_experiment(EXPERIMENT_NAME)

# Start an MLFlow experiment run
with mlflow.start_run(experiment_id=experiment_id, run_name="iris dataset rf run") as run:
    # Log parameters
    
    mlflow.log_param("max_depth", 10)
    mlflow.log_param("random_state", 0)
    mlflow.log_param("n_estimators", 100)
    clf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=0)
    clf.fit(X_train, y_train)
    iris_predict_y = clf.predict(X_test)
    
    roc_auc_score_val = roc_auc_score(y_test, clf.predict_proba(X_test), multi_class='ovr')
    mlflow.log_metric("test roc_auc_score", roc_auc_score_val)
    
    accuracy_score = accuracy_score(y_test, iris_predict_y)
    mlflow.log_metric("test accuracy_score", accuracy_score)
    mlflow.log_artifact("deploy.yaml")

    # Log model
    mlflow.sklearn.log_model(clf, artifact_path="model")




   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

   target  
0       0  
1       0  
2       0  
3       0  
4       0  
IRIS train df shape
(105, 4)
(105,)
IRIS test df shape
(45, 4)
(45,)
