In [4]:
def describe(data):
    print("Total de linhas: {}".format(data.shape[0]))
    return data.describe().round(2)

def quantificarDadosFaltantes(data):
    return (((data.count(axis=0)/data.shape[0])*100)-100).round(1)

def exibirFaixaDinamicaDasVariaveis(dadosNumericos):
    import pandas as pd
    faixaDinamica = pd.concat([dadosNumericos.min(), dadosNumericos.max()], axis=1)
    faixaDinamica.columns = ['Min', 'Max']
    faixaDinamica['Range'] = faixaDinamica['Max'] - faixaDinamica['Min']
    return faixaDinamica.round(2)

def exibirHistogramaComKdeParaCadaVariavel(dadosNumericos):
    import matplotlib.pyplot as plt
    import seaborn as sb
    for coluna in dadosNumericos.columns:
        plt.figure(figsize=[5,3])
        plt.title(coluna)
        #plt.hist(data[coluna]) 
        sb.distplot(dadosNumericos[coluna])

def exibirHistogramaComKdeParaCadaVariavel2(label1, dadosNumericos1, label2, dadosNumericos2):
    import matplotlib.pyplot as plt
    import seaborn as sb
    for coluna in dadosNumericos1.columns:
        try:
            plt.figure(figsize=[5,3])
            plt.title(coluna)
            #plt.hist(data[coluna]) 
            sb.distplot(dadosNumericos1[coluna], label=label1)
            sb.distplot(dadosNumericos2[coluna], label=label2)
            plt.legend()
        except TypeError:
            print(coluna)            

def exibirHistogramaComKdeParaCadaVariavelComAgrupamento(variavelParaAgrupamento, dadosNumericos):
    import matplotlib.pyplot as plt
    dadosSemVariavelAgrupamento = dadosNumericos.copy()
    dadosSemVariavelAgrupamento.drop([variavelParaAgrupamento], axis=1, inplace=True)
    for coluna in dadosSemVariavelAgrupamento.columns:
        plt.figure(figsize=[4,4])
        plt.title(coluna)
        dadosNumericos.groupby([variavelParaAgrupamento])[coluna].plot.kde(legend=True)

def exibirGraficoCoeficientesDeCorrelacao(variavelAlvo, data):
    import matplotlib.pyplot as plt
    plt.figure(figsize=[10,5])
    data.corr()[variavelAlvo].drop(variavelAlvo).plot.bar()

def exibirScatterMatrix(data):
    import matplotlib.pyplot as plt
    import pandas as pd
    plt.rc('figure', figsize=[15,15])
    return pd.plotting.scatter_matrix(data)

def exibirBoxplotPorVariavel(dadosNumericos):
    import matplotlib.pyplot as plt
    for coluna in dadosNumericos.columns:
        plt.figure(figsize=[7,3])
        plt.title(coluna)
        plt.boxplot(dadosNumericos[coluna])

def removerOutlier(data, nomesDasColunasNumericas):
    import numpy as np
    for cname in nomesDasColunasNumericas:
        lo_lim = np.quantile(data[cname], 0.10)
        up_lim = np.quantile(data[cname], 0.90)
        data.loc[data[cname] < lo_lim, cname] = lo_lim
        data.loc[data[cname] > up_lim, cname] = up_lim
    return data
        
def normalizarPorMinMax(dadosNumericos):
    import pandas as pd
    from sklearn import preprocessing
    scaler = preprocessing.MinMaxScaler()
    scaler.fit(dadosNumericos)
    return pd.DataFrame(scaler.transform(dadosNumericos), columns=dadosNumericos.columns)

def normalizarPorStandardScaler(dadosNumericos):
    import pandas as pd
    from sklearn import preprocessing
    scaler = preprocessing.StandardScaler()
    scaler.fit(dadosNumericos)
    return pd.DataFrame(scaler.transform(dadosNumericos), columns=dadosNumericos.columns)

def normalizarPorLog(dadosNumericos):
    import numpy as np
    normed = dadosNumericos.copy()
    for coluna in normed.columns:
        normed[coluna] = np.log1p(normed[coluna])
    return normed

def criarFeatures(data, categoricalVariableName):
    import pandas as pd
    cat_data = pandas.get_dummies(data[categoricalVariableName])
    new_data = pandas.concat((data.drop(categoricalVariableName, axis=1), cat_data), axis=1, sort=False)
    return new_data

def treinar_modelo(scaler, estimator, Xtrain, Ytrain):
    from sklearn import pipeline
    conf_train_pipe = [
        ('Scaler', scaler),
        ('Model', estimator),
    ]
    model_pipe = pipeline.Pipeline(conf_train_pipe)
    model_pipe.fit(Xtrain, Ytrain)
    return model_pipe

def treinar_modelo_com_grid_search(scaler, estimator, param_grid, Xtrain, Ytrain, cvfold):
    from sklearn import pipeline
    from sklearn import model_selection
    conf_train_pipe = [
        ('Scaler', scaler),
        ('Model', estimator),
    ]
    model_pipe = pipeline.Pipeline(conf_train_pipe)
    model_pipe = model_selection.GridSearchCV(model_pipe, param_grid, scoring=None, cv=cvfold)
    model_pipe.fit(Xtrain, Ytrain)
    return model_pipe

def exibir_curva_de_aprendizado(descricaoModelo, estimator, scorer, Xtrain, Ytrain, cvfold):
    
    import matplotlib.pyplot as plt
    import numpy as np
    from sklearn import metrics
    from ipynb.fs.full.functions import plot_learning_curve

    #scorer = metrics.make_scorer(metrics.mean_squared_error)
    #scorer = metrics.make_scorer(metrics.accuracy_score)

    title = 'Curva de Aprendizado - ' + descricaoModelo

    plt.figure()
    plot_learning_curve(estimator, title, Xtrain, Ytrain, ax=plt.gca(),
                        cv=cvfold.split(Xtrain, Ytrain, groups=Ytrain),
                        scoring=scorer, n_jobs=3)

def exibirCurvaRoc(title, Ytest, Yhat):
    import matplotlib.pyplot as plt
    from sklearn import metrics
    plt.figure()
    fpr, tpr, thr = metrics.roc_curve(Ytest, Yhat)
    auc = metrics.roc_auc_score(Ytest, Yhat)
    plt.plot(fpr, tpr, '-', lw=2, label='AUC: %.2f'%(auc))
    plt.legend()
    plt.title(title)
    plt.grid()
    plt.xlabel('Taxa de Falso Alarme')
    plt.ylabel('Taxa de Detecção')

def exibirMatrixDeConfusao(Ytest, Ypred, classes):
    from sklearn import metrics
    import pandas as pd
    
    col_names = ['Model ' + s for s in classes]
    idx_names = ['Real ' + s for s in classes]

    cmat = metrics.confusion_matrix(Ytest, Ypred)
    cmat = pd.DataFrame(cmat, index=idx_names, columns=col_names)
    cmat['Real Total'] = cmat.sum(axis=1)
    cmat.loc['Model Total',:] = cmat.sum(axis=0)
    cmat = cmat.astype(int)
    return cmat

def mape(y_true, y_pred):
    return (abs(y_pred - y_true)/y_true).mean() * 100
