In [13]:
import mlflow, prefect
import warnings, git, hashlib, os
from prefect import flow, task, get_run_logger, unmapped
from prefect.task_runners import SequentialTaskRunner

In [14]:
#Github config

try:
    repo = git.Repo(search_parent_directories=True)
    branch = repo.active_branch.name
    sha = repo.head.commit.hexsha
except:
    warnings.warn('No github repository! Generating new SHA.')
    sha = hashlib.algorithms_guaranteed

In [15]:
#Mlflow config

name = 'Best Classifier'
experiment = mlflow.get_experiment_by_name(name)
if not experiment:
    experiment_id = mlflow.create_experiment(name)
experiment = mlflow.set_experiment(name)
run_params = {"experiment_id" : experiment.experiment_id,
              "description" : "Testing different classifiers for articial data with prefect and mlflow.",
              "tags" : {'release.version':'0.0.1'}}

In [16]:
print("Experiment ID: {}".format(experiment.experiment_id))
print("Artifact Location: {}".format(experiment.artifact_location))
print("Tags: {}".format(experiment.tags))
print("Lifecycle stage: {}".format(experiment.lifecycle_stage))

Experiment ID: 1
Artifact Location: file:///home/lpfgarcia/Desktop/practical-prefect/mlruns/1
Tags: {}
Lifecycle stage: active


In [20]:
from sklearn.datasets import make_classification

@task
def generate_data(n_samples, n_features):
    return make_classification(n_samples, n_features, n_informative=2, n_redundant=2, random_state=42)

In [21]:
from sklearn.model_selection import train_test_split

@task
def split_data(X, y):
    return train_test_split(X, y, shuffle=False, test_size=int(len(X)*0.1))

In [22]:
@flow
def generate_split(n_samples, n_features):
    X, y = generate_data(n_samples, n_features)
    return split_data(X, y)

In [23]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB

def classifiers():
    
    clf_list = [
        (KNeighborsClassifier(3), "Nearest Neighbors"),
        (SVC(kernel="linear", C=0.025), "Linear SVM"),
        (DecisionTreeClassifier(max_depth=5), "Decision Tree"),
        (RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), "Random Forest"),
        (AdaBoostClassifier(), "AdaBoost"),
        (GaussianNB(), "Naive Bayes"),
    ]
        
    return clf_list

In [29]:
from sklearn.metrics import accuracy_score

@flow
def evaluate_classifier(clf, samples, features):
 
    X_train, X_test, y_train, y_test = generate_split(samples, features)

    logger = get_run_logger()
    flow = prefect.context.get_run_context()
    description = ' '.join(['prefect_flow_name',flow.flow_run.name,'prefect_flow_id',flow.flow_run.id.urn])
    
    with mlflow.start_run(description=description) as run:

        mlflow.log_param('name', clf[1])
        mlflow.log_params(clf[0].get_params())  
        
        model = clf[0].fit(X_train, y_train)
        
        mlflow.sklearn.log_model(
            sk_model = model,
            artifact_path = clf[1]
        )

        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test, y_pred)

        mlflow.log_metric('accuracy', acc)
        logger.info(f'accuracy {acc}')
        
    return model, y_pred, acc

In [31]:
@flow(description='Evaluating different classifier for each dataset.', 
      version=branch + ' ' + sha)
def evaluate(clf, samples, features):
    logger = get_run_logger()
    _, _, acc = evaluate_classifier(clf, samples, features)
    logger.info(f'Accuracy of {acc}')

In [32]:
clf_list = classifiers()
samples = range(10000, 80000, 10000)
features = range(20, 30, 2)

combination = [(x,y) for x in samples for y in features]    
for samples, features in combination:
    evaluate(clf_list[0], samples, features)

20:16:00.295 | INFO    | prefect.engine - Created flow run 'true-boar' for flow 'evaluate'

 `@task(name='my_unique_name', ...)`
20:16:03.955 | INFO    | Flow run 'true-boar' - Created subflow run 'cyan-teal' for flow 'evaluate-classifier'
20:16:06.362 | INFO    | Flow run 'cyan-teal' - Created subflow run 'astonishing-trout' for flow 'generate-split'
20:16:07.564 | INFO    | Flow run 'astonishing-trout' - Created task run 'generate_data-eeb4694a-0' for task 'generate_data'
20:16:07.565 | INFO    | Flow run 'astonishing-trout' - Executing 'generate_data-eeb4694a-0' immediately...
20:16:08.809 | INFO    | Task run 'generate_data-eeb4694a-0' - Finished in state Completed()
20:16:09.393 | INFO    | Flow run 'astonishing-trout' - Created task run 'split_data-b2f518fa-0' for task 'split_data'
20:16:09.394 | INFO    | Flow run 'astonishing-trout' - Executing 'split_data-b2f518fa-0' immediately...
20:16:10.588 | INFO    | Task run 'split_data-b2f518fa-0' - Finished in state Completed()
20:16:

20:17:08.434 | INFO    | Flow run 'angelic-viper' - Created subflow run 'loud-husky' for flow 'evaluate-classifier'

 `@task(name='my_unique_name', ...)`
20:17:10.311 | INFO    | Flow run 'loud-husky' - Created subflow run 'talented-toucan' for flow 'generate-split'
20:17:11.515 | INFO    | Flow run 'talented-toucan' - Created task run 'generate_data-eeb4694a-0' for task 'generate_data'
20:17:11.515 | INFO    | Flow run 'talented-toucan' - Executing 'generate_data-eeb4694a-0' immediately...
20:17:13.872 | INFO    | Task run 'generate_data-eeb4694a-0' - Finished in state Completed()
20:17:14.463 | INFO    | Flow run 'talented-toucan' - Created task run 'split_data-b2f518fa-0' for task 'split_data'
20:17:14.464 | INFO    | Flow run 'talented-toucan' - Executing 'split_data-b2f518fa-0' immediately...
20:17:15.684 | INFO    | Task run 'split_data-b2f518fa-0' - Finished in state Completed()
20:17:16.337 | INFO    | Flow run 'talented-toucan' - Finished in state Completed()
20:17:17.947 | IN

20:17:30.431 | ERROR   | Flow run 'smooth-serval' - Crash detected! Execution was interrupted by an unexpected exception.
20:17:31.106 | ERROR   | Flow run 'perfect-dormouse' - Encountered exception during execution:
Traceback (most recent call last):
  File "/home/lpfgarcia/Desktop/practical-prefect/lib/python3.9/site-packages/prefect/engine.py", line 596, in orchestrate_flow_run
    result = await run_sync(flow_call)
  File "/home/lpfgarcia/Desktop/practical-prefect/lib/python3.9/site-packages/prefect/utilities/asyncutils.py", line 68, in run_sync_in_worker_thread
    return await anyio.to_thread.run_sync(call, cancellable=True)
  File "/home/lpfgarcia/Desktop/practical-prefect/lib/python3.9/site-packages/anyio/to_thread.py", line 31, in run_sync
    return await get_asynclib().run_sync_in_worker_thread(
  File "/home/lpfgarcia/Desktop/practical-prefect/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 937, in run_sync_in_worker_thread
    return await future
  File "/ho

20:17:31.681 | ERROR   | Flow run 'perfect-dormouse' - Finished in state Failed('Flow run encountered an exception.')
20:17:31.682 | ERROR   | Flow run 'ludicrous-copperhead' - Encountered exception during execution:
Traceback (most recent call last):
  File "/home/lpfgarcia/Desktop/practical-prefect/lib/python3.9/site-packages/prefect/engine.py", line 596, in orchestrate_flow_run
    result = await run_sync(flow_call)
  File "/home/lpfgarcia/Desktop/practical-prefect/lib/python3.9/site-packages/prefect/utilities/asyncutils.py", line 68, in run_sync_in_worker_thread
    return await anyio.to_thread.run_sync(call, cancellable=True)
  File "/home/lpfgarcia/Desktop/practical-prefect/lib/python3.9/site-packages/anyio/to_thread.py", line 31, in run_sync
    return await get_asynclib().run_sync_in_worker_thread(
  File "/home/lpfgarcia/Desktop/practical-prefect/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 937, in run_sync_in_worker_thread
    return await future
  File "/ho

20:17:31.717 | ERROR   | Flow run 'ludicrous-copperhead' - Finished in state Failed('Flow run encountered an exception.')


OperationalError: (sqlite3.OperationalError) database is locked
[SQL: PRAGMA journal_mode = WAL;]
(Background on this error at: https://sqlalche.me/e/14/e3q8)