In [1]:
import fastai
fastai.__version__

'2.2.7'

In [2]:
from io import StringIO
import os
import shutil

import argparse
import csv
import json
import joblib
import numpy as np
import pandas as pd

from sklearn.compose import ColumnTransformer, make_column_selector
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import Binarizer, StandardScaler, OneHotEncoder


In [3]:
from sklearn.linear_model import LogisticRegression

In [4]:
from fastai.tabular.all import *
from fastai.vision.all import *

  return torch._C._cuda_getDeviceCount() > 0


In [5]:
feature_columns_names = [
       'credito_coaplicante',
	   'credito_fiador',
       'financiamento_outro_lugar',
	   'tipo_emprego', 
	   'conta_corrente_valor',
       'outros_creditos_aqui',
	   'finance__credits__other_banks',
       'numero_dependentes',
	   'investimentos_valor',
	   'duracao_residencia',
       'valor_solicitado',
	   'tem_telefone', 
	   'duracao_emprego',
       'duracao_credito',
	   'historico_credito',
	   'financiamento_outros_bens',
       'proposito_credito',
	   'tipo_residencia',
	   'receita_disponivel']




feature_columns_dtype = {
       'credito_coaplicante'            : "float64",
	   'credito_fiador'                 : "float64", 
       'financiamento_outro_lugar'      : "float64",
	   'tipo_emprego'                   : "category" ,
	   'conta_corrente_valor'           : "category" ,
       'outros_creditos_aqui'           : "float64",
	   'finance__credits__other_banks'  : "float64",
       'numero_dependentes'             : "float64",
	   'investimentos_valor'            : "category" ,
	   'duracao_residencia'             : "category" ,
       'valor_solicitado'               : "float64",
	   'tem_telefone'                   : "category" ,
	   'duracao_emprego'                : "float64",
       'duracao_credito'                : "float64",
	   'historico_credito'              : "category" ,
	   'financiamento_outros_bens'      : "category" ,
       'proposito_credito'              : "category" ,
	   'tipo_residencia'                : "category" ,
	   'receita_disponivel'             : "float64"
}

label_column = 'deu_default'
label_column_dtype = { 'deu_default': "category"} 


In [6]:
def merge_two_dicts(x, y):
    z = x.copy()   # start with x's keys and values
    z.update(y)    # modifies z with y's keys and values & returns None
    return z

In [7]:
file = 'credito_limpo.csv'
input_files = ['credito_limpo.csv' ]
model_path = 'models'

In [52]:
###  label é a primeira coluna no arquivo de entrada    
raw_data = [ pd.read_csv(
        file, 
        header=None, 
        names= feature_columns_names + [label_column],
        dtype=merge_two_dicts(feature_columns_dtype, label_column_dtype)) for file in  input_files ]
concat_data = pd.concat(raw_data)
y = concat_data[label_column]

# Labels should not be preprocessed. predict_fn will reinsert the labels after featurizing.
concat_data.drop(label_column, axis=1, inplace=True)


category_features = ['tipo_emprego', 'conta_corrente_valor', 'investimentos_valor',                  
                     'tem_telefone', 'historico_credito', 'financiamento_outros_bens',                        'proposito_credito',
                     'tipo_residencia', 'tipo_residencia'
                    ]

numeric_features = [ 'credito_coaplicante' ,'credito_fiador',                                               'financiamento_outro_lugar' , 'outros_creditos_aqui',  
                    'finance__credits__other_banks' , 'numero_dependentes' ,                                'valor_solicitado' ,               
                    'duracao_emprego' ,  'duracao_residencia',  'duracao_credito'  ]                

numeric_transformer = make_pipeline(
        SimpleImputer(strategy='median'),   ## replace NaN value
        StandardScaler())                   ## remove mean, and stddev =1

categorical_transformer = make_pipeline(
        SimpleImputer(strategy='constant', fill_value='missing'),    ## replace NaN                                                                          value
        OneHotEncoder())                                      ##  nao tem OrdinalEncoder())

preprocessor = ColumnTransformer(transformers=[
             ("num", numeric_transformer, numeric_features),
             ("cat", categorical_transformer, category_features)
              ])
    
preprocessor.fit(concat_data)

joblib.dump(preprocessor, os.path.join(model_path, "model.joblib"))

print("saved model!")
    

saved model!


In [53]:
concat_data.head()

Unnamed: 0,credito_coaplicante,credito_fiador,financiamento_outro_lugar,tipo_emprego,conta_corrente_valor,outros_creditos_aqui,finance__credits__other_banks,numero_dependentes,investimentos_valor,duracao_residencia,valor_solicitado,tem_telefone,duracao_emprego,duracao_credito,historico_credito,financiamento_outros_bens,proposito_credito,tipo_residencia,receita_disponivel
0,0.0,0.0,0.0,operario,sem conta,2.0,1.0,1.0,muito_alta,2,1500.0,Nao,2.0,6.0,muito pobre,imovel,carro novo,proprio,2.0
1,0.0,0.0,0.0,operario,baixa,1.0,0.0,1.0,baixa,4,1656.0,Nao,2.0,9.0,boa,imovel,carro novo,aluguel,4.0
2,0.0,0.0,0.0,nivel medio,negativa,2.0,0.0,1.0,alta,4,2028.0,Nao,15.0,6.0,muito pobre,carro ou outro,eletronico,proprio,4.0
3,0.0,0.0,0.0,operario,negativa,1.0,2.0,1.0,baixa,1,2034.0,Nao,12.0,12.0,boa,carro ou outro,requalificacao,proprio,4.0
4,0.0,0.0,0.0,nivel medio,negativa,1.0,0.0,1.0,baixa,1,2058.0,Nao,1.0,6.0,boa,imovel,eletrodomestico,proprio,4.0


In [28]:
preprocessor.transform(concat_data.iloc[:1,:])

array([[-0.20676767, -0.23420572, -0.20926161,  1.02707891,  1.40836493,
        -0.42828957, -1.07086487, -0.73002164, -1.23647786,  0.        ,
         0.        ,  0.        ,  1.        ,  0.        ,  0.        ,
         0.        ,  1.        ,  0.        ,  0.        ,  0.        ,
         1.        ,  0.        ,  0.        ,  1.        ,  0.        ,
         0.        ,  1.        ,  0.        ,  0.        ,  1.        ,
         0.        ,  0.        ,  0.        ,  1.        ,  0.        ,
         1.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  1.        ,  0.        ,  0.        ,
         1.        ]])

In [54]:
# Preprocess training input
preprocessed_train = preprocessor.transform(concat_data)
ll = LogisticRegression()
ll.fit(preprocessed_train, y)
#y_pred = ll.predict(preprocessed_train)
ll.score(preprocessed_train, y)

0.779

# Serial Inference Pipeline with Scikit preprocessor and Linear Learner <a class="anchor" id="serial_inference"></a>


## Set up the inference pipeline <a class="anchor" id="pipeline_setup"></a>
Setting up a Machine Learning pipeline can be done with the Pipeline Model. This sets up a list of models in a single endpoint; in this example, we configure our pipeline model with the fitted Scikit-learn inference model and the fitted Linear Learner model. Deploying the model follows the same ```deploy``` pattern in the SDK.

In [30]:
from sklearn.pipeline import Pipeline
pipe = Pipeline([('preprocessing', preprocessor), ('regression', ll)])

In [31]:
pipe.fit(concat_data, y)

Pipeline(steps=[('preprocessing',
                 ColumnTransformer(transformers=[('num',
                                                  Pipeline(steps=[('simpleimputer',
                                                                   SimpleImputer(strategy='median')),
                                                                  ('standardscaler',
                                                                   StandardScaler())]),
                                                  ['credito_coaplicante',
                                                   'credito_fiador',
                                                   'financiamento_outro_lugar',
                                                   'outros_creditos_aqui',
                                                   'finance__credits__other_banks',
                                                   'numero_dependentes',
                                                   'valor_solicitado',
                        

In [32]:
pipe.score(concat_data,y)

0.776

In [56]:
%%writefile credito_service.py
import pandas as pd
import json
import bentoml
from bentoml.frameworks.sklearn import SklearnModelArtifact
#from bentoml.service.artifacts.common import PickleArtifact
#from bentoml.handlers import DataframeHandler
from bentoml.adapters import DataframeInput, FileInput

@bentoml.artifacts([
                    SklearnModelArtifact("model_a"),
                    SklearnModelArtifact("ml")
                    ])
@bentoml.env(pip_packages=["scikit-learn", "pandas"])
class CreditPrediction(bentoml.BentoService):

    @bentoml.api(input=DataframeInput(), batch=True)
    def predict(self, df):
        """
        predict expects pandas.Series as input
        """  
        # print("***********")
        # print("df", df.iloc[0,:].T)
        # print("***********")
        #print("colunas", df.columns)
        df = self.artifacts.model_a.transform(df)
        print(df)
        return self.artifacts.ml.predict(df)

Overwriting credito_service.py


In [57]:
from credito_service import CreditPrediction

svc = CreditPrediction()
svc.pack('model_a', preprocessor)
svc.pack('ml', ll)
saved_path =svc.save()





[2021-03-16 09:39:37,956] INFO - BentoService bundle 'CreditPrediction:20210316093936_A4D92E' saved to: /home/rubens/bentoml/repository/CreditPrediction/20210316093936_A4D92E


In [58]:
!bentoml serve CreditPrediction:latest

[2021-03-16 09:39:43,699] INFO - Getting latest version CreditPrediction:20210316093936_A4D92E
[2021-03-16 09:39:43,700] INFO - Starting BentoML API server in development mode..
 * Serving Flask app "CreditPrediction" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off
 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
***********
df credito_coaplicante                      0.0
credito_fiador                           0.0
financiamento_outro_lugar                0.0
tipo_emprego                     nivel medio
conta_corrente_valor               sem conta
outros_creditos_aqui                     2.0
finance__credits__other_banks            1.0
numero_dependentes                       1.0
investimentos_valor               muito_alta
duracao_residencia                         2
valor_solicitado                      1500.0
tem_telefone                             Nao
duracao_emprego                          2.0
duracao_cred

In [9]:
!bentoml ec2 deploy credito-bentoml -b CreditPrediction:20210316111837_2A5E06


Error: [31mec2 deploy failed: INTERNAL:Error executing docker command: 
The command 'docker' could not be found in this WSL 2 distro.
We recommend to activate the WSL integration in Docker Desktop settings.

See https://docs.docker.com/docker-for-windows/wsl/ for details.

[0m


In [9]:
#!bentoml deployment delete credito-bentoml

Error: [31mdeployment delete failed: NOT_FOUND:Deployment "credito-bentoml" in namespace "dev" not found[0m


In [11]:
#!bentoml deployment list

[39mNAME                           NAMESPACE    PLATFORM       BENTO_SERVICE                             STATUS    AGE
bentoml-fastai                 dev          aws-ec2        FastaiTabularModel:20210303170358_692543  running   1 week and 5 days
bentoml-deployment             dev          aws-ec2        IrisClassifier:20210301145955_642469      running   1 week and 6 days
my-first-sagemaker-deployment  dev          aws-sagemaker  IrisClassifier:20210301145955_642469      error     2 weeks and 20 hours[0m


In [12]:
#!bentoml list

[39mBENTO_SERVICE                           AGE                           APIS                                   ARTIFACTS                                                LABELS
CreditPrediction:20210316111837_2A5E06  22 minutes and 54.34 seconds  predict<DataframeInput:DefaultOutput>  model_a<SklearnModelArtifact>, ml<SklearnModelArtifact>
CreditPrediction:20210316093936_A4D92E  2 hours and 1 minute          predict<DataframeInput:DefaultOutput>  model_a<SklearnModelArtifact>, ml<SklearnModelArtifact>
CreditPrediction:20210316092104_E6F16C  2 hours and 20 minutes        predict<DataframeInput:DefaultOutput>  model_a<SklearnModelArtifact>, ml<SklearnModelArtifact>
CreditPrediction:20210316090718_F9423B  2 hours and 34 minutes        predict<DataframeInput:DefaultOutput>  model_a<SklearnModelArtifact>, ml<SklearnModelArtifact>
CreditPrediction:20210316090359_5F4A40  2 hours and 37 minutes        predict<DataframeInput:DefaultOutput>  model_a<SklearnModelArtifact>, ml<SklearnModelArtifac

In [None]:
## ferramentas
# import json
# dic1 = pd.read_("input1.csv")
# d2 = json.load(open("input1.csv"))
# pd.DataFrame([d2])
# concat_data.iloc[0].to_json("input1.csv")