In [1]:
import os
import pandas as pd
import numpy as np
import mlflow
import mlflow.sklearn
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV, KFold
import matplotlib.pyplot as plt
import seaborn as sns
import pickle as pkl

In [2]:
data = pd.read_csv('../data/processed/clf/data.csv')

In [3]:
X, y = data.drop('Estado al egreso', axis=1), data['Estado al egreso']

In [4]:
X = StandardScaler().fit_transform(X)

In [5]:
params = {
    'hidden_layer_sizes': [(5, 10, 5), (10, 10, 10)],
    'activation': ['tanh', 'relu', 'logistic'],
    'solver': ['sgd', 'adam'],
    'learning_rate': ['constant','adaptive'],
    'max_iter': [500, 1000],
    'learning_rate_init': [0.2, 0.3, 0.1],
    'alpha': [0.0001, 0.001, 0.01],    
}

In [6]:
clf = MLPClassifier()

In [7]:
os.chdir('../')

In [9]:
with mlflow.start_run():
    
    grid = GridSearchCV(estimator=clf, param_grid=params, scoring='f1', cv=5)
    grid.fit(X, y)

    mlflow.log_param('model_type', 'neural-network')
    for param, value in grid.best_params_.items():
        mlflow.log_param(param, value)
    
    mlflow.log_metric("best_score", grid.best_score_)
    mlflow.sklearn.log_model(grid.best_estimator_, 'MLPClassifier')

    print('Best score: ', grid.best_score_)
    print('Best params: ', grid.best_params_)



Best score:  0.9407563025210084
Best params:  {'activation': 'logistic', 'alpha': 0.001, 'hidden_layer_sizes': (5, 10, 5), 'learning_rate': 'adaptive', 'learning_rate_init': 0.2, 'max_iter': 1000, 'solver': 'adam'}


In [11]:
with open('models/mlp/model.pkl', 'wb') as file:
    pkl.dump(grid.best_estimator_, file)