# MLFLOW

mlflow server --backend-store-uri sqlite:///mlrunsdb.db --default-artifact-root ./mlflowruns --host 0.0.0.0 --port 5000

In [12]:
import mlflow

from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score)

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline

In [13]:
df = pd.read_csv("carros.csv", sep=";")

feat_import = ['Peso', 'TipoMotor', 'Cilindros', 'HP', 'Tempo', 'Consumo']
df = df[feat_import]

In [14]:
lb = [0, 1, 2]
df.drop(columns='Consumo')
df['Consumo']= pd.qcut(df['Consumo'], q=3, labels=lb)

In [15]:
X = df.drop(columns = 'Consumo')
y = df['Consumo']

X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size = 0.25,
                                                    random_state = 42)

In [16]:
X_train

Unnamed: 0,Peso,TipoMotor,Cilindros,HP,Tempo
12,373,0,8,180,176
0,262,0,6,110,1646
4,344,0,8,175,1702
16,5345,0,8,230,1742
5,346,1,6,105,2022
13,378,0,8,180,18
11,407,0,8,180,174
23,384,0,8,245,1541
1,2875,0,6,110,1702
2,232,1,4,93,1861


In [12]:
db_uri =  "sqlite:///mlrunsdb.db"
mlflow.set_tracking_uri(db_uri)

tags = {
    "Módulo": "Modelos Produtivos",
    "Nomes": ['Leonardo Guelfi', 'Marcelo Yonei'],
    "Turma": 815,
    "Projeto": "Carros"
}

mlflow.set_experiment(experiment_name="Classificação de Consumo de combustível")
mlflow.set_experiment_tags(tags)

2022/11/13 10:08:09 INFO mlflow.tracking.fluent: Experiment with name 'Classificação de Consumo de combustível' does not exist. Creating a new experiment.


In [22]:
def get_metrics(y_test:list, y_pred:list) -> list:
    ac = accuracy_score(y_test, y_pred)
    pr = precision_score(y_test, y_pred, average='weighted')
    rc = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    return [ac, pr, rc, f1]

In [23]:
with mlflow.start_run(
    run_name='API',
    description='Projeto Carros com logit',
) as model_run:


    model = Pipeline([('scaler',StandardScaler()),('logit', LogisticRegression(C=0.5, penalty = 'l2'))])
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    
    ac, pr, rc, f1 = get_metrics(y_test, y_pred)

    params = {
            "c":0.5,
            "penalty":"l2",
            "features":len(X_train.columns),
            "size_train_dataset": len(X_train),
            "size_test_dataset": len(X_test)}

    metrics = {
        "acuracia":ac,
        "precision":pr,
        "recall":rc,
        "f1":f1}

    mlflow.log_params(params=params)
    mlflow.log_metrics(metrics=metrics)

    mlflow.sklearn.log_model(model, "model")

mlflow server --backend-store-uri sqlite:///mlrunsdb.db --default-artifact-root ./mlflowruns --host 0.0.0.0 --port 5000

In [24]:
import joblib

file_name = "logit_model_v1.pk1"

joblib.dump(model, file_name)

['logit_model_v1.pk1']

In [25]:
mlflow.set_tracking_uri(uri='http://localhost:5000/')

PATH = 'models:/carros_logit/Production'

loaded_model = mlflow.sklearn.load_model(PATH)

loaded_model.predict(X_test)

array([0, 0, 0, 2, 2, 1, 0, 2], dtype=int64)

In [26]:
loaded_model.score(X_test,y_test)

0.75