# Intro autolog mlflow

Links examples:

- sklearn: https://mlflow.org/docs/latest/python_api/mlflow.sklearn.html#mlflow.sklearn.autolog

- tensorflow: https://github.com/mlflow/mlflow/blob/master/examples/keras/train.py

- other examples: https://mlflow.org/docs/latest/tutorials-and-examples/index.html

In [2]:
import mlflow
!pip show mlflow # version mflow used

Name: mlflow
Version: 2.3.0
Summary: MLflow: A Platform for ML Development and Productionization
Home-page: https://mlflow.org/
Author: Databricks
Author-email: 
License: Apache License 2.0
Location: d:\anaconda\envs\data-science-python-3-10\lib\site-packages
Requires: alembic, click, cloudpickle, databricks-cli, docker, entrypoints, Flask, gitpython, importlib-metadata, Jinja2, markdown, matplotlib, numpy, packaging, pandas, protobuf, pyarrow, pytz, pyyaml, querystring-parser, requests, scikit-learn, scipy, sqlalchemy, sqlparse, waitress
Required-by: 


### 1. Autolog tensorflow

In [4]:
"""Trains and evaluate a simple MLP
on the Reuters newswire topic classification task.
"""
import numpy as np
from tensorflow import keras
from tensorflow.keras.datasets import reuters
from tensorflow.keras.layers import Activation, Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import Tokenizer

# The following import and function call are the only additions to code required
# to automatically log metrics and parameters to MLflow.
import mlflow

mlflow.tensorflow.autolog()

max_words = 1000
batch_size = 32
epochs = 5

print("Loading data...")
(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words, test_split=0.2)

print(len(x_train), "train sequences")
print(len(x_test), "test sequences")

num_classes = np.max(y_train) + 1
print(num_classes, "classes")

print("Vectorizing sequence data...")
tokenizer = Tokenizer(num_words=max_words)
x_train = tokenizer.sequences_to_matrix(x_train, mode="binary")
x_test = tokenizer.sequences_to_matrix(x_test, mode="binary")
print("x_train shape:", x_train.shape)
print("x_test shape:", x_test.shape)

print("Convert class vector to binary class matrix (for use with categorical_crossentropy)")
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

print("Building model...")
model = Sequential()
model.add(Dense(512, input_shape=(max_words,)))
model.add(Activation("relu"))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation("softmax"))

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

history = model.fit(
    x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_split=0.1
)
score = model.evaluate(x_test, y_test, batch_size=batch_size, verbose=1)
print("Test score:", score[0])
print("Test accuracy:", score[1])



Loading data...
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters.npz
8982 train sequences
2246 test sequences
46 classes
Vectorizing sequence data...
x_train shape: (8982, 1000)
x_test shape: (2246, 1000)
Convert class vector to binary class matrix (for use with categorical_crossentropy)
y_train shape: (8982, 46)
y_test shape: (2246, 46)
Building model...


2023/12/24 19:13:25 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '102475bc70a64082ba4a5ee27e8f2845', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
INFO:tensorflow:Assets written to: C:\Users\JORTEGAL\AppData\Local\Temp\tmpbqq0l3cy\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\JORTEGAL\AppData\Local\Temp\tmpbqq0l3cy\model\data\model\assets


Test score: 0.8922085165977478
Test accuracy: 0.7902938723564148


### 2. Autolog sklearn
Note: the version of sklearn used is not soported

In [10]:
import numpy as np
from sklearn.linear_model import LogisticRegression

import mlflow
import mlflow.sklearn
from mlflow.models import infer_signature

#mlflow.sklearn.autolog() # oprueba autolog
mlflow.sklearn.autolog()

if __name__ == "__main__":
    X = np.array([-2, -1, 0, 1, 2, 1, 10, 12, 23]).reshape(-1, 1)
    y = np.array([0, 0, 1, 1, 1, 0, 0,0,0])
    lr = LogisticRegression()

    lr.fit(X, y)
    score = lr.score(X, y)
    print(f"Score: {score}")
    #mlflow.log_metric("score", score)
    predictions = lr.predict(X)
    signature = infer_signature(X, predictions)
    mlflow.sklearn.log_model(lr, "model", signature=signature)
    print(f"Model saved in run {mlflow.active_run().info.run_uuid}")



AttributeError: module 'sklearn.metrics' has no attribute 'SCORERS'