In [23]:
%load_ext autoreload
%autoreload 2

import mlflow
import pickle
import pandas as pd 
import numpy as np

from datetime import datetime
from functions import get_metrics

from matplotlib import pyplot as plt 
import seaborn as sns

from sklearn.neural_network import MLPClassifier

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Configs

In [24]:
EXPERIMENT_TITLE = "Titanic Dataset Analyzes"

run_description = """
### Descrição

Implementação usando [MLPClassifier](https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html)
"""

tags = {
    "date": datetime.now(),
    "author": "Pablo Veinberg",
    "version": 1.0,
    "envoronment": "local",
    "mlflow.note.content": run_description,
    "mlflow.runName": "MLPClassifier",
    "data_source": "./../datasets/silver/train-encoded-not-normalize.parquet",
    "train_test_dataset": "./../datasets/silver/titanic-train-test-data.pkl"
}

params = {
    "token": np.random.randint(10_000,high=99_000),
    'max_iter': 10_000,
    'tol': 0.000010,
    'verbose': True, 
    'solver': 'adam',
    'activation': 'relu', 
    'hidden_layer_sizes': ()
    }


## Loads

In [25]:
dataset = pd.read_parquet(tags['data_source'])
mlflow_dataset = mlflow.data.from_pandas(dataset, \
                                            source=tags['data_source'], \
                                            name="Titanic Dataset")

  return _dataset_source_registry.resolve(


In [26]:
with open('./../datasets/silver/titanic-train-test-data.pkl', 'rb') as file:
    X_train, X_test, y_train, y_test = pickle.load(file)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((712, 8), (179, 8), (712,), (179,))

## Process

In [27]:
mlflow.set_experiment(EXPERIMENT_TITLE)

with mlflow.start_run():


    hidden_size = (int((X_train.shape[1]+1)/2)+1)*10
    params['hidden_layer_sizes'] = (hidden_size, hidden_size)

    model = MLPClassifier(max_iter=params['max_iter'],\
                          tol=params['tol'], \
                          verbose=params['verbose'], \
                          solver=params['solver'], \
                          activation=params['activation'], \
                          hidden_layer_sizes=params['hidden_layer_sizes'])
    
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    plot_confusion_matrix_path = f"./../results/mlp_classifier_neural_network_{params['token']}.png"
    metrics = get_metrics(y_test, y_pred, \
                        plot_confusion_matrix_path, \
                        "Matrix Confusion - Titanic Dataset")
    
    mlflow.log_metrics(metrics)
    mlflow.log_artifact(plot_confusion_matrix_path)


    # Settings
    mlflow.log_input(mlflow_dataset)
    mlflow.set_tags(tags)
    mlflow.log_params(params)

mlflow.end_run()

Traceback (most recent call last):
  File "/home/pablo_veinberg/.local/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 302, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/home/pablo_veinberg/.local/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 395, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/home/pablo_veinberg/.local/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1303, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/home/pablo_veinberg/.local/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1296, in _read_helper
    result = read_yaml(root, file_name)
  File "/home/pablo_veinberg/.local/lib/python3.10/site-packages/mlflow/utils/file_utils.py", line 303, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' does not exist.")
mlflow.e

Iteration 1, loss = 0.86456357
Iteration 2, loss = 0.78011707
Iteration 3, loss = 0.71585736
Iteration 4, loss = 0.66182424


Iteration 5, loss = 0.61934908
Iteration 6, loss = 0.58285618
Iteration 7, loss = 0.55313258
Iteration 8, loss = 0.52491293
Iteration 9, loss = 0.50280020
Iteration 10, loss = 0.48254357
Iteration 11, loss = 0.46549806
Iteration 12, loss = 0.45107644
Iteration 13, loss = 0.43843630
Iteration 14, loss = 0.42808118
Iteration 15, loss = 0.41948861
Iteration 16, loss = 0.41235382
Iteration 17, loss = 0.40654546
Iteration 18, loss = 0.40139314
Iteration 19, loss = 0.39641528
Iteration 20, loss = 0.39222420
Iteration 21, loss = 0.38810298
Iteration 22, loss = 0.38367789
Iteration 23, loss = 0.37966668
Iteration 24, loss = 0.37549637
Iteration 25, loss = 0.37173780
Iteration 26, loss = 0.36785032
Iteration 27, loss = 0.36415074
Iteration 28, loss = 0.36043246
Iteration 29, loss = 0.35666013
Iteration 30, loss = 0.35311722
Iteration 31, loss = 0.34931977
Iteration 32, loss = 0.34582407
Iteration 33, loss = 0.34207118
Iteration 34, loss = 0.33865222
Iteration 35, loss = 0.33460611
Iteration 36,

  return _infer_schema(self._df)
