In [4]:
def train(in_n, in_criterion, in_max_depth):    
    from sklearn.datasets import load_breast_cancer
    import pandas as pd
    import numpy as np
    from sklearn.model_selection import train_test_split
    from sklearn.ensemble import RandomForestClassifier
    from sklearn import metrics
    import time
    
    import mlflow
    import mlflow.sklearn
    
    #Load the data
    breast_cancer = load_breast_cancer()
    
    #Split data into features and target
    X = breast_cancer.data
    y = breast_cancer.target
    
    #Split datea into test data and training data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=101)
    
    #n = number of trees in the forest
    if float(in_n) is None:
        n = 100
    else:
        n = in_n
    
    # depth of a tree
    if in_max_depth is None:
        max_depth = None
    else:
        max_depth = in_max_depth
     
    # function to measure the quality of the split
    if in_criterion is None:
        criterion = 'gini'
    else:
        criterion = in_criterion
        
    with mlflow.start_run(nested = True):
        
        #Create Random Forest model
        rfc = RandomForestClassifier(n_estimators=n, criterion=criterion, max_depth = max_depth )
        
        #Set start time
        start_time = time.time()

        #Train the model
        rfc.fit(X_train,y_train)
        
        #Calculate elapsed time
        elapsed_time = time.time() - start_time

        #Predict
        predictions = rfc.predict(X_test)
        
        #Compare prediction to actual data and calculate the accuracy
        accuracy = metrics.accuracy_score(y_test, predictions)
        precision =  metrics.precision_score(y_test, predictions)
        recall = metrics.recall_score(y_true = y_test, y_pred = predictions)
        f1_score = metrics.f1_score(y_true = y_test, y_pred = predictions)
        
         #Log parameter, metrics, and model to MLflow
        mlflow.log_param("n", n)
        mlflow.log_param("criterion", criterion)
        mlflow.log_param("max depth", max_depth)
        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("recall", recall)
        mlflow.log_metric("precision", precision)
        mlflow.log_metric('f1', f1_score)
        mlflow.log_metric('elapsed time', elapsed_time)
        
        #Log the model
        mlflow.sklearn.log_model(rfc, "model")

In [2]:
n_list = [2, 5, 10, 20, 50, 100, 500, 1000]
criterion_list = [ 'gini', 'entropy']
max_depth_list = [ 2, 3, 5, 10, 20, 100]

In [5]:
for x in n_list:
    for y in criterion_list:
        for z in max_depth_list:
            train(x, y, z)

NameError: name 'X' is not defined