In [40]:
import pandas as pd
import numpy as np
from datetime import datetime
from dateutil.relativedelta import relativedelta
from conector.mysql import mysql_engine
from sqlalchemy import create_engine
from pricing.service.scoring.base import BaseScoring
from werkzeug import exceptions
from scipy import stats
from pricing.utils import formata_cnpj
from datamanager import conn_pricing


class LScoring(BaseScoring):
    def __init__(self, data=None, cnpj=None, produto=None):
        self.cnpj = cnpj
        self.produto = data.get("id_produto") if not data is None else produto
        self.params = self.get_dados() if not self.cnpj is None else data.get("dados")
        # self.params = data['dados']
        # self.produto = data['id_produto']
        self.faturamentos = None
        self.razao_outlier = None
        self.data_max = None
        self.estabilidade = None
        self.pesos = None
        self.volatilidade = None
        self.curva_score = None
        self.score_crescimento = None
        self.prop_queda = None
        self.score_volatilidade = None
        self.slope = None
        self.erro = None
        self.probabilidade_zeros = None
        self.zscore = None
    
    def get_dados(self):
        if self.produto in ["tomatico", "padrao"]:
            engine = create_engine("mysql+pymysql://capMaster:#jackpot123#@captalys.cmrbivuuu7sv.sa-east-1.rds.amazonaws.com:23306/credito-digital")
            con = engine.connect()
        else:
            engine = create_engine("mysql+pymysql://capMaster:#jackpot123#@captalys.cmrbivuuu7sv.sa-east-1.rds.amazonaws.com:23306/varejo")
            con = engine.connect()
        
        query_wirecard = "select cnpj, data, valor from fluxo_wirecard where cnpj='{}'".format(self.cnpj)
        query_pv = "select cpf_cnpj as cnpj, data, valor from fluxo_pv where cpf_cnpj='{}'".format(formata_cnpj(self.cnpj))
        query_tomatico = "select cnpj, dataFluxo as data, valorFluxo as valor from tb_Fluxo where cnpj='{}'".format(self.cnpj)
        query_justa = "select cnpj, data, valor from fluxo_justa where cnpj='{}'".format(self.cnpj)
        dict_query = {"tomatico" : query_tomatico,
                    "padrao" : query_tomatico,
                    "wirecard" : query_wirecard,
                    "moip" : query_wirecard,
                    "pagueveloz" : query_pv,
                    "justa" : query_justa
                }
        query = dict_query.get(self.produto)
        df = pd.read_sql(query, con)
        con.close()
        df = df.groupby("data").sum().reset_index()
        try:
            df["data"] = df.apply(lambda x : x["data"].date(), axis=1)
        except:
            pass
        dados = df[["data", "valor"]].to_dict("records")
        return dados
    
    @classmethod
    def validar_dados(cls, data):
        if data is None:
            raise exceptions.BadRequest("Missing data")

        if not isinstance(data['dados'], list):
            raise exceptions.UnprocessableEntity(
                "Field 'dados' should be an array")

    @staticmethod
    def gera_periodo(periods=12):
        now = datetime.now().date()
        start = datetime(now.year, now.month, 1)
        start = start - relativedelta(months=periods)
        datas = pd.date_range(start=start, periods=periods, freq='MS')
        datas = [el.date() for el in datas]
        return datas

    @staticmethod
    def mensaliza(df):
        df.index = pd.to_datetime(df.data)
        df = df.resample('MS').sum().reset_index()
    
        return df

    def isElegible(self):
        df = pd.DataFrame(self.params)
        df = self.mensaliza(df)
        per = self.gera_periodo(periods=6)
        
        df = df[df['data'].isin(per)].copy()
        lista_val = df['valor'].tolist()
        if 0 in lista_val or len(df) < 6:
            return None
        return 1

    def gera_serie(self, periods=12):
        df = pd.DataFrame(self.params)
        df = self.mensaliza(df)
        df['data'] = df.data.dt.date
        periodo_completo = self.gera_periodo(periods=periods)
        df = df[df['data'].isin(periodo_completo)]
        if df.empty:
            self.faturamentos = df
            return
        data_min = df['data'].min()
        datas = pd.date_range(
            start=data_min, end=periodo_completo[-1], freq="MS")
        datas = [el.date() for el in datas]

        for data in datas:
            if data not in df['data'].tolist():
                df_extra = pd.DataFrame({"data": [data], "valor": [0]})
                df = pd.concat([df, df_extra])
                df.sort_values("data", inplace=True)

        if self.faturamentos is None:
            self.faturamentos = df
        return

    def outlier_6meses(self):
        razao_outlier = self.faturamentos['valor'].mean(
        )/np.mean(self.faturamentos['valor'].tolist()[:-1])
        if self.razao_outlier is None:
            self.razao_outlier = razao_outlier
        return

    def data_maxima(self):
        res = dict(zip(list(self.faturamentos['valor'].diff())[
                   1:], self.faturamentos['data'].tolist()[0:-1]))
        data_max = res.get(np.max(list(res.keys())))
        if self.data_max is None:
            self.data_max = data_max
        return

    def crescimento_efetivo(self):
        df = self.faturamentos[self.faturamentos['data'] > self.data_max]
        estabilidade = df['valor'].std()/df['valor'].iloc[0]
        if self.estabilidade is None:
            self.estabilidade = estabilidade
        return

    def calcula_pesos(self):
        pesos = list(range(1, self.faturamentos.shape[0]))

        if self.estabilidade <= 0.15:
            dic_pesos = dict(
                zip(self.faturamentos['data'].tolist()[:-1], pesos))
            peso_max = np.max(list(dic_pesos.values()))
            dic_pesos[self.data_max] = peso_max

            if self.data_max - relativedelta(months=1) in list(dic_pesos.keys()):
                p = dic_pesos.get(self.data_max - relativedelta(months=1))
            else:
                p = 0

            keys = pd.date_range(start=self.data_max + relativedelta(months=1),
                                 end=list(dic_pesos.keys())[-1], freq='MS')
            keys = [el.date() for el in keys]

            i = 1
            for data in keys:
                dic_pesos[data] = p + i
                i += 1
        else:
            dic_pesos = dict(
                zip(self.faturamentos['data'].tolist()[:-1], pesos))

        if self.pesos is None:
            self.pesos = dic_pesos
        return

    def calcula_volatilidade(self):
        self.volatilidade = self.faturamentos['valor'].std(
        )/self.faturamentos['valor'].mean()
        return

    # score de crescimento
    def lscore(self):
        pesos = list(self.pesos.values())

        if self.razao_outlier >= 2:
            pesos[-1] = 1

        dfcalc = self.faturamentos[['valor']].diff()
        dfcalc.dropna(inplace=True)
        dfcalc['pesos'] = pesos
        dfcalc['tx'] = dfcalc['valor'] * dfcalc['pesos']
        tx = dfcalc['tx'].sum() / dfcalc['pesos'].sum()
        tx = tx/self.faturamentos['valor'].mean()
        return tx

    def calibracao(self):
        eng = mysql_engine("apiPricing")
        df = pd.read_sql("select * from apiPricing.calibracao_score", eng)
        self.curva_score = df[['metrica',
                               'score', 'tipo_metrica', 'bandwidth']]
        return

    def get_score(self, metrica, tipo_metrica):
        dfcal = self.curva_score[self.curva_score['tipo_metrica']
                                 == tipo_metrica]
        bw = dfcal['bandwidth'].iloc[0]

        if tipo_metrica == 'lscore':
            if metrica <= dfcal['metrica'].min():
                return 0
            if metrica >= dfcal['metrica'].max():
                return 1000
        else:
            if metrica >= dfcal['metrica'].max():
                return 0
            if metrica <= dfcal["metrica"].min():
                return 1000

        return dfcal[(dfcal['metrica'] >= metrica-bw) & (dfcal['metrica'] <= metrica+bw)]['score'].mean()

    def prop_quedas(self):
        dt = self.faturamentos
        df1 = dt[['valor']].diff()
        df1.dropna(inplace=True)
        df1['flag'] = df1.apply(lambda x: int(x['valor'] < 0), axis=1)
        if 1 not in df1['flag'].tolist():
            self.prop_queda = 0
        if 0 not in df1["flag"].tolist():
            self.prop_queda = 1
        
        return

    def calcula_tendencia(self):
        dt = pd.DataFrame(self.params)
        dt["valor"] = dt["valor"]/dt["valor"].max()
        x = dt.index
        y = dt['valor']

        slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
        self.slope = slope
        self.erro = std_err
        return
    
    # calculo da probabilidade de se observar faturamento nulo
    def probabilidade_faturamento_nulo(self):
        _df = self.faturamentos
        media = _df['valor'].mean()
        _df['prop'] = _df['valor']/media

        periodo_elegibilidade = self.gera_periodo(periods=6)
        df_zeros = _df[~_df['data'].isin(periodo_elegibilidade)]
        
        # qualquer valor menor que 20% do valor medio sera considerado faturamento nulo
        probabilidade = len(df_zeros[df_zeros['prop'] <= 0.2])/len(_df)
        
        if self.probabilidade_zeros is None:
            self.probabilidade_zeros = probabilidade
        return 
    
    def calcula_zscore(self, score_inicial):
        if self.probabilidade_zeros > 0:
            n = len(self.faturamentos)
            # considering a valid prob if we have at least 10 months
            if n >= 10:
                score = score_inicial * ((((-1) * n)/(n-6)) * self.probabilidade_zeros + 1)
                if self.zscore is None:
                    self.zscore = score
                    print("ZSCORE : {}".format(score))
    @property
    def correcao(self):
        return {6 : 0.7, 7 : 0.8, 8 : 0.9}
    
    def get_correcao(self, score):
        historico = len(self.faturamentos)
        
        fator_correcao = self.correcao.get(historico, 1)
        
        return fator_correcao*score

    def calcula(self):
        if self.produto == 'tomatico' or self.produto == "padrao":
            if not self.isElegible():
                return {'score': np.nan}

        self.gera_serie()
        if self.faturamentos.empty:
            return {"score" : np.nan}

        now = datetime.now().date() - relativedelta(months=1)
        data_proposta = datetime(now.year, now.month, 1).date()

        if self.faturamentos[self.faturamentos['data'] == data_proposta]['valor'].iloc[0] == 0:
            self.faturamentos = self.faturamentos[self.faturamentos['data'] != data_proposta]

        self.data_maxima()
        self.outlier_6meses()
        self.calcula_volatilidade()
        self.crescimento_efetivo()
        self.calcula_pesos()
        self.probabilidade_faturamento_nulo()

        lscore = self.lscore()

        self.prop_quedas()
        self.calibracao()

        score = self.get_score(metrica=lscore, tipo_metrica='lscore')
        self.score_crescimento = score
        if self.prop_queda == 0:
            self.score_crescimento = 1000
            self.calcula_zscore(self.score_crescimento)
            if not self.zscore is None:
                score = (self.score_crescimento + self.zscore)/2
                
            else:
                score = self.score_crescimento
            
            score = self.get_correcao(score)
            return {'score' : int(score)}

        if self.prop_queda == 1:
            self.calcula_zscore(self.score_crescimento)
            if not self.zscore is None:
                score = (self.zscore + self.score_crescimento)/2
        
            else:
                score = self.score_crescimento
                
            score = self.get_correcao(score)
            return {'score' : int(score)}

        self.calcula_tendencia()
        if self.slope < -0.2:
            self.calcula_zscore(score)
            if not self.zscore is None:
                score = (self.zscore + self.score_crescimento)/2
            else:
                score = self.score_crescimento
            score = self.get_correcao(score)
            return {'score': int(score)}

        if abs(self.slope) <= 0.01 and self.erro < 0.05:
            self.score_volatilidade = 1000*(1-self.erro)
            score = (2*self.score_crescimento + self.score_volatilidade)/3
            self.calcula_zscore(score)
            
            if not self.zscore is None:
                score = (self.zscore + score)/2

            score = self.get_correcao(score)
            
            return {'score': int(score)}

        self.params = self.faturamentos.sort_values('data', ascending=False).iloc[:6, :].sort_values('data').to_dict('records')
        self.calcula_tendencia()
        if self.slope < -0.2:
            self.calcula_zscore(self.score_crescimento)
            
            if not self.zscore is None:
                score = (self.zscore + self.score_crescimento)/2
            else:
                score = self.score_crescimento

            score = self.get_correcao(score)
            
            return  {'score': int(score)}
        
        self.score_volatilidade = int(self.get_score(metrica=self.volatilidade, tipo_metrica='vscore'))
        
        score = (2*self.score_crescimento + self.score_volatilidade)/3
        
        self.calcula_zscore(score)
        if not self.zscore is None:
            score = (self.zscore + score)/2

        score = self.get_correcao(score)
        
        return {'score': int(score)}

    
   

In [41]:
# from pricing.service.scoring.lscore import LScoring
from sqlalchemy import create_engine
import numpy as np
import pandas as pd
import requests
from time import sleep
from datetime import datetime
from conector.mysql import mysql_engine, CaptalysDBContext
from dateutil.relativedelta import relativedelta

class DScoring(object):
    def __init__(self, cnpj, produto, socios=False, baseline_type = 'lscore'):
        self.cnpj = cnpj
        self.doctype = 'cpf' if len(self.cnpj)<12 else 'cnpj'
        self.baseline_type = baseline_type
        self.score_socios = socios
        self.produto = produto
        self.lscore = None
        self.baseline = 1000
        self.fator_elegibilidade = 2
        self.faturamento_medio = None
        self.calibracao_segmento = None
        self.consulta = None
        self.estados_dividas = None
        self.dispersao_divida = None
        self.idade_empresa = None
        self.metricas = None
        
    def score_mestre(self):
        ls = LScoring(cnpj=self.cnpj, produto=self.produto)
        
        df = pd.DataFrame(ls.params)
        periodo = len(df)

        datas = pd.date_range(end=datetime.now().date().replace(day=1) - relativedelta(months=1), periods=periodo, freq='MS')
        datas = [el.date() for el in datas]
        df['data'] = datas

        params = df.to_dict("records")
        ls.params = params
        lscore = ls.calcula().get('score')
        fat_medio = ls.faturamentos['valor'].mean()
        self.lscore = lscore
        self.faturamento_medio = fat_medio
        return
    
    
    @property
    def campos_divida(self):
        return {
                "restricoes" : ["data_ocorrencia", "modalidade_natureza", "natureza", "valor"],
                "protestos" : ["data_anotacao", "natureza", "sub_judice_descricao", "valor"],
                "pendencias" : ["data_ocorrencia", "modalidade", "natureza", "valor"],
                "processos" : ["data_ocorrencia", "descricao_natureza", "natureza", "valor"],
                "restricoes_financeiras" : ["data_ocorrencia", "modalidade_natureza", "natureza", "valor"]
               }
    @property
    def campos_rename(self):
        return {
                "processos" : {"descricao_natureza" : "modalidade_natureza"},
                "pendencias" : {"modalidade" : "modalidade_natureza"},
                "protestos" : {'sub_judice_descricao' : "modalidade_natureza", "data_anotacao" : "data_ocorrencia"}
                }
    
    @property
    def segmentos(self):
        return {"credito" : ['EMPRESCONTA', 'EMPRESTIMO', 'CREDCARTAO', 'FINANCIAMENT', 
                             'CREDITOEFINANCIAMENTO-FINANC'],
                "processos" : ['EXCJUDTRAB', 'FISCALESTADUAL', 'EXECUCAO', 'FISCALFEDERAL', 
                               'FISCALMUNICIPAL','EXECUCAO-JE', 'BUSCAEAPREENSAO'],
                "infra" : ['FATAGUA', 'TELEFFX', 'TELEFFIXA', 'TELEFMOVEL', 'CONDOMINIO', 
                           'ENERGIAELET', 'ALUGUEL', 'SERVTELEFON'] 
               }
    

    @staticmethod
    def get_numero_consulta(doc):
        engine = create_engine("mysql+pymysql://capMaster:#jackpot123#@captalys.cmrbivuuu7sv.sa-east-1.rds.amazonaws.com:23306/varejo")
        con = engine.connect()
        query = "select data_ref, numero_consulta from consultas_idwall_operacoes where cnpj_cpf='{}'".format(doc)
        df = pd.read_sql(query, con)
        numero = df[df['data_ref']==df['data_ref'].max()]["numero_consulta"].iloc[0]
        con.close()
#         self.numero_consulta = numero
        return numero
    
    @staticmethod
    def get_details(numero):
        URL = "https://api-v2.idwall.co/relatorios"
        authorization = "b3818f92-5807-4acf-ade8-78a1f6d7996b"
        url_details = URL + "/{}".format(numero) + "/dados"
        while True:
            dets = requests.get(url_details, headers={"authorization": authorization})
            djson = dets.json()
            sleep(1)
            if djson['result']['status'] == "CONCLUIDO":
                break

        return dets.json()
    
    @staticmethod
    def formata_dados(df):
        df['modalidade_natureza'] = df.apply(lambda x : x['modalidade_natureza'].replace(" ", "") if isinstance(x['modalidade_natureza'], str) else "OUTROS", axis=1)
        df['valor'] = df.apply(lambda x : x['valor'].split("R$ ")[1].replace(",", "."), axis=1)
        df["valor"] = df.apply(lambda x : float(x["valor"]), axis=1)
        return df
    
    def get_infos_dividas(self, js, tp_pendencia):
        res = js.get("result").get(tp_pendencia)
        if not res is None:
            df = pd.DataFrame(res.get('itens'))
            cols = self.campos_divida.get(tp_pendencia)
            if "uf" in list(df.columns):
                cols = cols + ["uf"]
                df = df[cols].copy()
            else:
                df = df[cols]
                df["uf"] = None
            rename = self.campos_rename.get(tp_pendencia)
            if not rename is None:
                df.rename(columns = rename, inplace=True)
            df["tipo"] = tp_pendencia
            return df
        return None
    
    
    def gera_dados(self, doc):
        numero = self.get_numero_consulta(doc)
        js = self.get_details(numero)
        if len(doc) > 11:
            self.consulta = js
        fr = []
        lista_pendencias = ["restricoes", "processos", "protestos", "pendencias", "restricoes_financeiras"]
        for el in lista_pendencias:
            res = self.get_infos_dividas(js, el)
            if not res is None:
                fr.append(res)
        if len(fr) == 0:
            return pd.DataFrame()
        df = pd.concat(fr)
        df = self.formata_dados(df)
        if len(doc) > 11:
            self.estados_dividas = df["uf"].unique().tolist()
        return df
    
    def calcula_dispersao_divida(self):
        uf_cnpj = self.consulta.get("result").get("cnpj").get("localizacao").get("estado")
        lista_dispersao = [el for el in self.estados_dividas if el!= uf_cnpj]
        dispersao = len(lista_dispersao)/4
        self.dispersao_divida = dispersao
        return
    
    def get_idade(self):
        data_abertura = self.consulta.get("result").get("cnpj").get("data_abertura")
        data_abertura = data_abertura.replace("/", "-")
        data = datetime.strptime(data_abertura, "%d-%m-%Y").date()
        idade = ((datetime.now().date() - data).days/366)
        self.idade_empresa = np.around(idade, 2)
        return 

    
    def atribui_segmento(self, df):
        df['segmento'] = df.apply(lambda x : 'processos' if x['tipo']=='processos'
                              else('credito' if x['modalidade_natureza'] in self.segmentos.get("credito")
                                  else ('infra' if x['modalidade_natureza'] in self.segmentos.get("infra") else "outros")), axis=1)
        return df
    
    @staticmethod
    def calcula_probabilidade(df):
        dt = df.groupby("segmento").count().reset_index()[["segmento", "valor"]]
        dt.columns = ["segmento", "ocorrencias"]
        dt["probabilidade"] = dt["ocorrencias"]/dt["ocorrencias"].sum()
        return dt
    
    @staticmethod
    def calcula_composicao(df):
        dt = df.groupby("segmento").sum().reset_index()
        dt.columns = ["segmento", "valor_divida"]
        dt["composicao"] = dt["valor_divida"]/dt["valor_divida"].sum()
        return dt
    
    
    def calcula_pi(self, dfcalc):
        dfcalc['pi'] = dfcalc['valor_divida']/dfcalc['fat_medio']
        dfcalc['pi'] = (1/self.fator_elegibilidade)*dfcalc['pi']
        return dfcalc

    @property
    def peso_segmento(self):
        return {
            "credito" : 4,
            "processos" : 3,
            "infra" : 2,
            "outros" : 1
        }
    
    @property
    def fator_segmento(self):
        return {"credito" : 1, "processos" : 0.8, "infra" : 0.6, "outros" : 0.4}
    
    def lambda_(self, c, p, segmento):
        f = self.fator_segmento.get(segmento)
        return c*p*f
    
    def calcula_lambda(self, dfcalc):
        dfcalc["lambda"] = dfcalc.apply(lambda x : self.lambda_(x["composicao"], x["pi"], x["segmento"]), axis=1)
        return dfcalc
    
    @staticmethod
    def calcula_risco(dfcalc):
        dfcalc["risco"] = dfcalc["probabilidade"]*dfcalc["lambda"]
        return dfcalc
    
    @staticmethod
    def d_score(risco_, score_limite):
        return -score_limite*risco_ + score_limite


    def calcula_dscore(self, dfcalc):
        if self.baseline_type == 'lscore':
            score_limite = 1*self.lscore
        else:
            score_limite = self.baseline
        dfcalc["dscore"] = dfcalc.apply(lambda x : self.d_score(x["risco"], score_limite) if x["pi"] <=1 else 0, axis=1)
        return dfcalc
    
    def get_metricas(self, dfcalc):
        segmentos = ["credito", "processos", "infra", "outros"]
        final = {}
        for el in segmentos:
            dt = dfcalc[dfcalc["segmento"]==el]
            res = {}
            if dt.empty:
                res["num_ocorr"] = 0
                res["comp"] = 0
                res["risco"] = 0
                final[el] = res
            else:
                res["num_ocorr"] = dt["ocorrencias"].iloc[0]
                res["comp"] = dt['composicao'].iloc[0]
                res["risco"] = dt["risco"].iloc[0]
                final[el] = res
        self.metricas = final
        return
    
    def update_dataset(self):
        df_metricas = pd.DataFrame()
        df_metricas["cnpj"] = [self.cnpj]
        df_metricas["produto"] = [self.produto]
        df_metricas["data_metricas"] = [datetime.now().date()]
        df_metricas["data_modelo"] = [None]
        df_metricas["num_ocorr_cr"] = [self.metricas.get('credito').get('num_ocorr')]
        df_metricas["num_ocorr_proc"] = [self.metricas.get('processos').get('num_ocorr')]
        df_metricas["num_ocorr_infra"] =  [self.metricas.get('infra').get('num_ocorr')]
        df_metricas["num_ocorr_out"] = [self.metricas.get('outros').get('num_ocorr')]
        df_metricas["comp_cr"] = [self.metricas.get('credito').get('comp')]
        df_metricas["comp_proc"] = [self.metricas.get('processos').get('comp')]
        df_metricas["comp_infra"] = [self.metricas.get('infra').get('comp')]
        df_metricas["comp_out"] = [self.metricas.get('outros').get('comp')]
        df_metricas["risco_cr"] = [self.metricas.get('credito').get('risco')]
        df_metricas["risco_proc"] = [self.metricas.get('processos').get('risco')]
        df_metricas["risco_infra"] = [self.metricas.get('infra').get('risco')]
        df_metricas["risco_out"] = [self.metricas.get('outros').get('risco')]
        df_metricas["idade_empresa"] = [self.idade_empresa]
        df_metricas["dispersao_divida"] = [self.dispersao_divida]
        df_metricas["outlier"] = [None]
        
        
        engine = create_engine("mysql+pymysql://capMaster:#jackpot123#@captalys.cmrbivuuu7sv.sa-east-1.rds.amazonaws.com:23306/credit_model")
        con = engine.connect()
        
        con.execute("delete from outlier_detection where cnpj='{}'".format(self.cnpj))
        df_metricas.to_sql('outlier_detection', schema='credit_model', con=con, if_exists='append', index=False)
        
        con.close()
        
        return
    
    def get_socios(self):
        schema = 'credito-digital' if self.produto != 'moip' else self.produto
        engine = mysql_engine(schema)
        query = "select cpf from tb_Socio where cnpj='{}'".format(self.cnpj)
        with CaptalysDBContext(engine) as db:
            res = db.session.execute(query).fetchall()

        lista_socios = [el[0] for el in res]
        self.lista_socios = lista_socios
        return lista_socios
    
    def calcula_socios(self):
        lista_socios = self.get_socios()
        resp = []
        for el in lista_socios:
            _df = self.gera_dados(el)
            if not _df.empty:
                resp.append(_df)
        if len(resp) == 0:
            return np.nan
        
        df = pd.concat(resp)
        df = self.atribui_segmento(df)
        dfp = self.calcula_probabilidade(df)
        dfc = self.calcula_composicao(df)
        dfcalc = dfp.merge(dfc, left_on="segmento", right_on="segmento", how='left')
        dfcalc['fat_medio'] = self.faturamento_medio
        dfcalc = self.calcula_pi(dfcalc)
        dfcalc = self.calcula_lambda(dfcalc)
        
        dfcalc = self.calcula_risco(dfcalc)
        dfcalc = self.calcula_dscore(dfcalc)
        self.get_metricas(dfcalc)
        dscore = dfcalc['dscore'].mean()
        
        lista_segmentos = dfcalc["segmento"].tolist()
        lista_dscore = dfcalc["dscore"].tolist()
        lista_dscore = [int(el) for el in lista_dscore]
        res = dict(zip(lista_segmentos, lista_dscore))
        res["lscore"] = int(self.lscore)
        res['dscore'] = int(dscore)
        res['score'] = int((self.lscore + dscore)/2)
        return res, dfcalc
    
            
    def calcula(self, update=True):
        self.score_mestre()
        doc = self.cnpj
        df = self.gera_dados(doc)
        if df.empty:
            return {}, None
        self.calcula_dispersao_divida()
        self.get_idade()
        df = self.atribui_segmento(df)
        dfp = self.calcula_probabilidade(df)
        dfc = self.calcula_composicao(df)
        dfcalc = dfp.merge(dfc, left_on="segmento", right_on="segmento", how='left')

        dfcalc['fat_medio'] = self.faturamento_medio
        dfcalc = self.calcula_pi(dfcalc)
        dfcalc = self.calcula_lambda(dfcalc)
        dfcalc = self.calcula_risco(dfcalc)
        dfcalc = self.calcula_dscore(dfcalc)
        self.get_metricas(dfcalc)
        if update:
            self.update_dataset()
        
        dscore = dfcalc['dscore'].mean()
        
        lista_segmentos = dfcalc["segmento"].tolist()
        lista_dscore = dfcalc["dscore"].tolist()
        lista_dscore = [int(el) for el in lista_dscore]
        res = dict(zip(lista_segmentos, lista_dscore))
        res["lscore"] = int(self.lscore)
        res['dscore'] = int(dscore)
        res['score'] = int((self.lscore + dscore)/2)
        
        dfcalc["baseline_type"] = self.baseline_type
        dfcalc["baseline"] = self.lscore if self.baseline_type == 'lscore' else 1000
        dfcalc["fator_elegibilidade"] = self.fator_elegibilidade
        dfcalc["cnpj"] = self.cnpj
        dfcalc["produto"] = self.produto
        dfcalc["data_ref"] = datetime.now().date()
        return res, dfcalc

# if __name__ == '__main__':
#     ds = DScoring(cnpj='11018748001703', produto='justa')
#     res, dfcalc = ds.calcula()
#     print(dfcalc.columns)
#     print(res)


#### 1. Justa

In [42]:
engine = create_engine("mysql+pymysql://capMaster:#jackpot123#@captalys.cmrbivuuu7sv.sa-east-1.rds.amazonaws.com:23306/varejo")
con = engine.connect()
dfjusta = pd.read_sql("select distinct cnpj from fluxo_justa where flag_aprovacao=1 and flag_cnpj=1", con)
con.close()

In [43]:
dfjusta.shape

(183, 1)

In [44]:
lista_justa = dfjusta["cnpj"].tolist()

In [45]:
from tqdm import tqdm_notebook

In [46]:
resp_dscore = []
resp_scores = []
err =[]

In [47]:
for el in tqdm_notebook(lista_justa):
    try:
        ds = DScoring(cnpj=el, produto='justa')
        res, dfcalc = ds.calcula()

        if len(res) == 0:
            df_scores = pd.DataFrame()
            df_scores["cnpj"] = [ds.cnpj]
            df_scores["produto"] = [ds.produto]
            df_scores["model_date"] = [datetime(2019, 5, 1).date()]
            df_scores["dscore"] = [None]
            df_scores["lscore"] = [ds.lscore]
            df_scores["score"] = [ds.lscore]
            df_scores["data_atualizacao"] = [datetime.now().date()]
        else:
            df_scores = pd.DataFrame()
            df_scores["cnpj"] = [ds.cnpj]
            df_scores["produto"] = [ds.produto]
            df_scores["model_date"] = [datetime(2019, 5, 1).date()]
            df_scores["dscore"] = res.get("dscore")
            df_scores["lscore"] = res.get("lscore")
            df_scores["score"] = res.get("score")
            df_scores["data_atualizacao"] = [datetime.now().date()]
        resp_scores.append(df_scores)

        if not dfcalc is None:
            resp_dscore.append(dfcalc)
    except:
        print("ERROR")
        err.append(el)

HBox(children=(IntProgress(value=0, max=183), HTML(value='')))

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.




ERROR
ERROR
ERROR
ERROR
ERROR


In [48]:
err

['03869173000139',
 '08744783000120',
 '08793847000182',
 '17815262000107',
 '71102289000106']

In [49]:
dfscore = pd.concat(resp_scores)

In [50]:
engine = create_engine("mysql+pymysql://capMaster:#jackpot123#@captalys.cmrbivuuu7sv.sa-east-1.rds.amazonaws.com:23306/varejo")
con = engine.connect()
dfscore.to_sql("credit_score", schema='credit_model', con=con, if_exists='append', index=False)
con.close()

In [51]:
df_dscore = pd.concat(resp_dscore)

In [52]:
df_dscore.columns

Index(['segmento', 'ocorrencias', 'probabilidade', 'valor_divida',
       'composicao', 'fat_medio', 'pi', 'lambda', 'risco', 'dscore',
       'baseline_type', 'baseline', 'fator_elegibilidade', 'cnpj', 'produto',
       'data_ref'],
      dtype='object')

In [53]:
df_dscore.head()

Unnamed: 0,segmento,ocorrencias,probabilidade,valor_divida,composicao,fat_medio,pi,lambda,risco,dscore,baseline_type,baseline,fator_elegibilidade,cnpj,produto,data_ref
0,outros,2,1.0,6864.04,1.0,25407.955714,0.135077,0.054031,0.05403064,645.151106,lscore,682,2,1246934000170,justa,2019-06-16
0,outros,3,1.0,712.66,1.0,29389.971429,0.012124,0.00485,0.004849681,576.192034,lscore,579,2,2125400000159,justa,2019-06-16
0,credito,2,0.666667,18087.32,0.996161,18350.365,0.492833,0.490941,0.3272939,110.3238,lscore,164,2,2595814000141,justa,2019-06-16
1,outros,1,0.333333,69.7,0.003839,18350.365,0.001899,3e-06,9.72042e-07,163.999841,lscore,164,2,2595814000141,justa,2019-06-16
0,outros,3,1.0,1528.83,1.0,11645.454286,0.065641,0.026256,0.02625625,650.460823,lscore,668,2,2802892000170,justa,2019-06-16


In [54]:
df_dscore.rename(columns={'dscore' : 'dscore_segmento'}, inplace=True)

In [55]:
engine = create_engine("mysql+pymysql://capMaster:#jackpot123#@captalys.cmrbivuuu7sv.sa-east-1.rds.amazonaws.com:23306/varejo")
con = engine.connect()
df_dscore.to_sql("debt_score", schema='credit_model', con=con, if_exists='append', index=False)
con.close()

In [56]:
resp_dscore = []
err2 =[]

In [57]:
for el in tqdm_notebook(lista_justa):
    try:
        ds = DScoring(cnpj=el, produto='justa', baseline_type='padrao')
        res, dfcalc = ds.calcula(update=False)

        if not dfcalc is None:
            resp_dscore.append(dfcalc)
    except:
        print("ERROR")
        err2.append(el)

HBox(children=(IntProgress(value=0, max=183), HTML(value='')))

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.




ERROR
ERROR
ERROR
ERROR
ERROR


In [58]:
df_dscore = pd.concat(resp_dscore)

In [59]:
df_dscore.rename(columns={'dscore' : 'dscore_segmento'}, inplace=True)

In [60]:
df_dscore.sort_values('dscore_segmento').head()

Unnamed: 0,segmento,ocorrencias,probabilidade,valor_divida,composicao,fat_medio,pi,lambda,risco,dscore_segmento,baseline_type,baseline,fator_elegibilidade,cnpj,produto,data_ref
0,credito,4,1.0,167940.29,1.0,190177.514286,0.441536,0.441536,0.441536,558.464389,padrao,1000,2,8992524000118,justa,2019-06-16
0,credito,2,0.666667,18087.32,0.996161,18350.365,0.492833,0.490941,0.327294,672.7061,padrao,1000,2,2595814000141,justa,2019-06-16
0,outros,6,1.0,7948.28,1.0,5428.818571,0.732045,0.292818,0.292818,707.181963,padrao,1000,2,24554146000147,justa,2019-06-16
0,outros,48,1.0,18021.41,1.0,18304.723333,0.492261,0.196904,0.196904,803.095522,padrao,1000,2,21969975000100,justa,2019-06-16
0,credito,1,1.0,7532.93,1.0,19184.322857,0.19633,0.19633,0.19633,803.669641,padrao,1000,2,3300735000129,justa,2019-06-16


In [61]:
engine = create_engine("mysql+pymysql://capMaster:#jackpot123#@captalys.cmrbivuuu7sv.sa-east-1.rds.amazonaws.com:23306/varejo")
con = engine.connect()
df_dscore.to_sql("debt_score", schema='credit_model', con=con, if_exists='append', index=False)
con.close()

#### 2. PagueVeloz

In [62]:
engine = create_engine("mysql+pymysql://capMaster:#jackpot123#@captalys.cmrbivuuu7sv.sa-east-1.rds.amazonaws.com:23306/varejo")
con = engine.connect()
dfpv = pd.read_sql("select distinct cpf_cnpj as cnpj from fluxo_pv where flag_aprovacao=1 and flag_cnpj=1", con)
con.close()

In [63]:
dfpv = dfpv.iloc[1:, :]

In [64]:
lista_pv = dfpv['cnpj'].unique().tolist()

In [65]:
lista_pv = [el.replace(".", "").replace("-", "").replace("/", "") for el in lista_pv]

In [66]:
lista_pv.__len__()

640

In [68]:
resp_dscore = []
resp_scores = []
err3 =[]

In [69]:
for el in tqdm_notebook(lista_pv):
    try:
        ds = DScoring(cnpj=el, produto='pagueveloz')
        res, dfcalc = ds.calcula()

        if len(res) == 0:
            df_scores = pd.DataFrame()
            df_scores["cnpj"] = [ds.cnpj]
            df_scores["produto"] = [ds.produto]
            df_scores["model_date"] = [datetime(2019, 5, 1).date()]
            df_scores["dscore"] = [None]
            df_scores["lscore"] = [ds.lscore]
            df_scores["score"] = [ds.lscore]
            df_scores["data_atualizacao"] = [datetime.now().date()]
        else:
            df_scores = pd.DataFrame()
            df_scores["cnpj"] = [ds.cnpj]
            df_scores["produto"] = [ds.produto]
            df_scores["model_date"] = [datetime(2019, 5, 1).date()]
            df_scores["dscore"] = res.get("dscore")
            df_scores["lscore"] = res.get("lscore")
            df_scores["score"] = res.get("score")
            df_scores["data_atualizacao"] = [datetime.now().date()]
        resp_scores.append(df_scores)

        if not dfcalc is None:
            resp_dscore.append(dfcalc)
    except:
        print("ERROR")
        err3.append(el)

HBox(children=(IntProgress(value=0, max=640), HTML(value='')))

ZSCORE : 632.218181818182
ERROR
ZSCORE : 553.2727272727273
ZSCORE : 603.7121212121211
ZSCORE : 496.2198347107437
ZSCORE : 581.5555555555557
ZSCORE : 569.8888888888889
ZSCORE : 728.8139866424059
ZSCORE : 627.6792929292928
ZSCORE : 520.1045454545455
ZSCORE : 582.5856550405249
ZSCORE : 532.7348484848486
ZSCORE : 697.1994949494949


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.




ZSCORE : 540.810606060606
ZSCORE : 618.469696969697
ZSCORE : 718.0991325343552
ZSCORE : 271.1085858585859
ZSCORE : 591.7676767676768
ZSCORE : 504.5868377969357
ZSCORE : 617.479797979798
ZSCORE : 619.8681818181818
ZSCORE : 608.5980957936675
ZSCORE : 594.2107455271371
ZSCORE : 440.8848484848486
ZSCORE : 604.0681818181818
ZSCORE : 482.9890909090909
ERROR
ZSCORE : 423.5893939393939
ZSCORE : 670.60101010101
ZSCORE : 265.1363636363637
ZSCORE : 442.94318181818176
ZSCORE : 698.39898989899
ZSCORE : 634.89898989899
ZSCORE : 589.6182279251765
ZSCORE : 442.7910213243547
ZSCORE : 423.6666666666667
ZSCORE : 470.2767957351292
ZSCORE : 574.2957575757576
ZSCORE : 450.86181818181814
ERROR
ZSCORE : 592.8984736682388
ZSCORE : 675.3418218471263
ZSCORE : 555.1388888888889
ZSCORE : 509.94999999999993
ZSCORE : 665.631313131313
ZSCORE : 619.1515151515152
ZSCORE : 550.1534576534577
ZSCORE : 696.6157575757577
ZSCORE : 501.729292929293
ZSCORE : 610.6363636363637
ZSCORE : 374.1703639350712
ZSCORE : 673.95454545454

In [70]:
err3

['01024429000181',
 '07274536000144',
 '10190517000144',
 '17651859000163',
 '21747326000157',
 '26,702316000119',
 '31956991000104',
 '69095586000111']

In [71]:
dfscore = pd.concat(resp_scores)

In [73]:
dfscore.head()

Unnamed: 0,cnpj,produto,model_date,dscore,lscore,score,data_atualizacao
0,225356000123,pagueveloz,2019-05-01,473.0,474,473,2019-06-16
0,354700000184,pagueveloz,2019-05-01,,769,769,2019-06-16
0,549449000103,pagueveloz,2019-05-01,613.0,615,614,2019-06-16
0,560279000168,pagueveloz,2019-05-01,,728,728,2019-06-16
0,575851000162,pagueveloz,2019-05-01,,716,716,2019-06-16


In [74]:
engine = create_engine("mysql+pymysql://capMaster:#jackpot123#@captalys.cmrbivuuu7sv.sa-east-1.rds.amazonaws.com:23306/varejo")
con = engine.connect()
dfscore.to_sql("credit_score", schema='credit_model', con=con, if_exists='append', index=False)
con.close()

In [75]:
dfscore['delta'] = dfscore["lscore"] - dfscore["score"]

In [77]:
dfscore.sort_values("delta", ascending=False).head()

Unnamed: 0,cnpj,produto,model_date,dscore,lscore,score,data_atualizacao,delta
0,4509695000192,pagueveloz,2019-05-01,208,767,487,2019-06-16,280
0,661205000118,pagueveloz,2019-05-01,399,737,568,2019-06-16,169
0,15523647000175,pagueveloz,2019-05-01,442,741,591,2019-06-16,150
0,15684485000157,pagueveloz,2019-05-01,440,683,561,2019-06-16,122
0,7263836000128,pagueveloz,2019-05-01,598,813,705,2019-06-16,108


In [78]:
df_dscore = pd.concat(resp_dscore)

In [80]:
df_dscore.rename(columns={'dscore' : 'dscore_segmento'}, inplace=True)

In [82]:
df_dscore[df_dscore["cnpj"]=='04509695000192']

Unnamed: 0,segmento,ocorrencias,probabilidade,valor_divida,composicao,fat_medio,pi,lambda,risco,dscore_segmento,baseline_type,baseline,fator_elegibilidade,cnpj,produto,data_ref
0,credito,1,1.0,34146.03,1.0,23433.760833,0.728565,0.728565,0.728565,208.190742,lscore,767,2,4509695000192,pagueveloz,2019-06-16


In [83]:
engine = create_engine("mysql+pymysql://capMaster:#jackpot123#@captalys.cmrbivuuu7sv.sa-east-1.rds.amazonaws.com:23306/varejo")
con = engine.connect()
df_dscore.to_sql("debt_score", schema='credit_model', con=con, if_exists='append', index=False)
con.close()

In [84]:
resp_dscore = []
err4 =[]

In [85]:
for el in tqdm_notebook(lista_pv):
    try:
        ds = DScoring(cnpj=el, produto='pagueveloz', baseline_type='padrao')
        res, dfcalc = ds.calcula(update=False)

        if not dfcalc is None:
            resp_dscore.append(dfcalc)
    except:
        print("ERROR")
        err4.append(el)

HBox(children=(IntProgress(value=0, max=640), HTML(value='')))

ZSCORE : 632.218181818182
ERROR
ZSCORE : 553.2727272727273
ZSCORE : 603.7121212121211
ZSCORE : 496.2198347107437
ZSCORE : 581.5555555555557
ZSCORE : 569.8888888888889
ZSCORE : 728.8139866424059
ZSCORE : 627.6792929292928
ZSCORE : 520.1045454545455
ZSCORE : 582.5856550405249
ZSCORE : 532.7348484848486
ZSCORE : 697.1994949494949


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.




ZSCORE : 540.810606060606
ZSCORE : 618.469696969697
ZSCORE : 718.0991325343552
ZSCORE : 271.1085858585859
ZSCORE : 591.7676767676768
ZSCORE : 504.5868377969357
ZSCORE : 617.479797979798
ZSCORE : 619.8681818181818
ZSCORE : 608.5980957936675
ZSCORE : 594.2107455271371
ZSCORE : 440.8848484848486
ZSCORE : 604.0681818181818
ZSCORE : 482.9890909090909
ERROR
ZSCORE : 423.5893939393939
ZSCORE : 670.60101010101
ZSCORE : 265.1363636363637
ZSCORE : 442.94318181818176
ZSCORE : 698.39898989899
ZSCORE : 634.89898989899
ZSCORE : 589.6182279251765
ZSCORE : 442.7910213243547
ZSCORE : 423.6666666666667
ZSCORE : 470.2767957351292
ZSCORE : 574.2957575757576
ZSCORE : 450.86181818181814
ERROR
ZSCORE : 592.8984736682388
ZSCORE : 675.3418218471263
ZSCORE : 555.1388888888889
ZSCORE : 509.94999999999993
ZSCORE : 665.631313131313
ZSCORE : 619.1515151515152
ZSCORE : 550.1534576534577
ZSCORE : 696.6157575757577
ZSCORE : 501.729292929293
ZSCORE : 610.6363636363637
ZSCORE : 374.1703639350712
ZSCORE : 673.95454545454

In [86]:
err4

['01024429000181',
 '07274536000144',
 '10190517000144',
 '17651859000163',
 '21747326000157',
 '26,702316000119',
 '31956991000104',
 '69095586000111']

In [88]:
df_dscore = pd.concat(resp_dscore)

In [90]:
df_dscore.rename(columns={'dscore' : 'dscore_segmento'}, inplace=True)

In [92]:
df_dscore.head()

Unnamed: 0,segmento,ocorrencias,probabilidade,valor_divida,composicao,fat_medio,pi,lambda,risco,dscore_segmento,baseline_type,baseline,fator_elegibilidade,cnpj,produto,data_ref
0,outros,1,1.0,174.98,1.0,21027.5,0.004161,0.001664,0.001664,998.335703,padrao,1000,2,225356000123,pagueveloz,2019-06-16
0,outros,1,1.0,145.92,1.0,10765.142857,0.006777,0.002711,0.002711,997.289028,padrao,1000,2,549449000103,pagueveloz,2019-06-16
0,credito,1,1.0,27496.75,1.0,30008.671667,0.458147,0.458147,0.458147,541.853263,padrao,1000,2,661205000118,pagueveloz,2019-06-16
0,outros,7,1.0,2377.44,1.0,13352.575,0.089026,0.03561,0.03561,964.38979,padrao,1000,2,852832000136,pagueveloz,2019-06-16
0,outros,1,1.0,9530.28,1.0,11474.720833,0.415273,0.166109,0.166109,833.890861,padrao,1000,2,1344355000160,pagueveloz,2019-06-16


In [93]:
engine = create_engine("mysql+pymysql://capMaster:#jackpot123#@captalys.cmrbivuuu7sv.sa-east-1.rds.amazonaws.com:23306/varejo")
con = engine.connect()
df_dscore.to_sql("debt_score", schema='credit_model', con=con, if_exists='append', index=False)
con.close()

#### 3. Wirecard

In [94]:
engine = create_engine("mysql+pymysql://capMaster:#jackpot123#@captalys.cmrbivuuu7sv.sa-east-1.rds.amazonaws.com:23306/varejo")
con = engine.connect()
dfw = pd.read_sql("select distinct cnpj from fluxo_wirecard where flag_aprovacao=1 and flag_cnpj=1", con)
con.close()

In [96]:
dfw.shape

(670, 1)

In [98]:
lista_wire = dfw['cnpj'].tolist()
lista_wire.__len__()

670

In [99]:
resp_dscore = []
resp_scores = []
err5 =[]

In [106]:
el =lista_wire[10]
el

'01056417000139'

In [107]:
ds = DScoring(cnpj=el, produto='wirecard')
res, dfcalc = ds.calcula(update=False)

In [108]:
res

{'outros': 735, 'lscore': 737, 'dscore': 735, 'score': 736}

In [109]:
dfcalc

Unnamed: 0,segmento,ocorrencias,probabilidade,valor_divida,composicao,fat_medio,pi,lambda,risco,dscore,baseline_type,baseline,fator_elegibilidade,cnpj,produto,data_ref
0,outros,1,1.0,277.9,1.0,27652.575,0.005025,0.00201,0.00201,735.518675,lscore,737,2,1056417000139,wirecard,2019-06-16


In [110]:
for el in tqdm_notebook(lista_wire):
    try:
        ds = DScoring(cnpj=el, produto='wirecard')
        res, dfcalc = ds.calcula()

        if len(res) == 0:
            df_scores = pd.DataFrame()
            df_scores["cnpj"] = [ds.cnpj]
            df_scores["produto"] = [ds.produto]
            df_scores["model_date"] = [datetime(2019, 6, 1).date()]
            df_scores["dscore"] = [None]
            df_scores["lscore"] = [ds.lscore]
            df_scores["score"] = [ds.lscore]
            df_scores["data_atualizacao"] = [datetime.now().date()]
        else:
            df_scores = pd.DataFrame()
            df_scores["cnpj"] = [ds.cnpj]
            df_scores["produto"] = [ds.produto]
            df_scores["model_date"] = [datetime(2019, 6, 1).date()]
            df_scores["dscore"] = res.get("dscore")
            df_scores["lscore"] = res.get("lscore")
            df_scores["score"] = res.get("score")
            df_scores["data_atualizacao"] = [datetime.now().date()]
        resp_scores.append(df_scores)

        if not dfcalc is None:
            resp_dscore.append(dfcalc)
    except:
        print("ERROR")
        err5.append(el)

HBox(children=(IntProgress(value=0, max=670), HTML(value='')))

ZSCORE : 398.89203417327775
ZSCORE : 608.3975757575757
ZSCORE : 476.3555555555556
ZSCORE : 683.888484848485
ZSCORE : 506.540606060606
ZSCORE : 340.7372603588985
ZSCORE : 383.5919191919191


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.




ZSCORE : 598.7121212121212
ZSCORE : 567.0808080808081
ZSCORE : 656.2666666666668
ZSCORE : 723.6969696969697
ZSCORE : 531.5479797979798
ZSCORE : 833.3333333333334
ZSCORE : 584.2327272727274
ZSCORE : 603.2717195766513
ZSCORE : 568.1987878787878
ZSCORE : 489.82828282828274
ZSCORE : 567.9468214811259
ZSCORE : 621.9577205607138
ZSCORE : 244.063973063973
ZSCORE : 415.62626262626264
ZSCORE : 582.1881671081613
ZSCORE : 282.94725028058355
ZSCORE : 407.21999999999997
ZSCORE : 588.098106835275
ZSCORE : 370.77441077441074
ZSCORE : 338.72424242424245
ZSCORE : 563.5414910366653
ZSCORE : 0.0
ZSCORE : 423.8141414141413
ZSCORE : 522.2585858585859
ZSCORE : 198.8383838383838
ZSCORE : 622.479797979798
ZSCORE : 643.0
ZSCORE : 672.631313131313
ZSCORE : 662.4242424242424
ZSCORE : 302.0
ZSCORE : 566.9191919191919
ZSCORE : 502.56590909090903
ZSCORE : 332.6257575757575
ZSCORE : 602.7313109263454
ZSCORE : 692.1969696969697
ZSCORE : 454.06545454545466
ERROR
ZSCORE : 508.6343434343435
ZSCORE : 338.0808080808081
ER

In [111]:
err5.__len__()

4

In [112]:
dfscore = pd.concat(resp_scores)

In [114]:
dfscore.head()

Unnamed: 0,cnpj,produto,model_date,dscore,lscore,score,data_atualizacao
0,18460000147,wirecard,2019-06-01,,710,710,2019-06-16
0,195664000153,wirecard,2019-06-01,,660,660,2019-06-16
0,240629000109,wirecard,2019-06-01,,641,641,2019-06-16
0,272073000302,wirecard,2019-06-01,,701,701,2019-06-16
0,467109000133,wirecard,2019-06-01,,438,438,2019-06-16


In [115]:
engine = create_engine("mysql+pymysql://capMaster:#jackpot123#@captalys.cmrbivuuu7sv.sa-east-1.rds.amazonaws.com:23306/varejo")
con = engine.connect()
dfscore.to_sql("credit_score", schema='credit_model', con=con, if_exists='append', index=False)
con.close()

In [116]:
dfscore["delta"] = dfscore["lscore"] - dfscore["score"]

In [118]:
dfscore.sort_values('delta', ascending=False).head()

Unnamed: 0,cnpj,produto,model_date,dscore,lscore,score,data_atualizacao,delta
0,10988444000130,wirecard,2019-06-01,143,407,275,2019-06-16,132
0,22504536000187,wirecard,2019-06-01,593,806,699,2019-06-16,107
0,12007794000102,wirecard,2019-06-01,802,998,900,2019-06-16,98
0,3347828000109,wirecard,2019-06-01,529,725,627,2019-06-16,98
0,17044972000180,wirecard,2019-06-01,480,673,576,2019-06-16,97


In [119]:
df_dscore = pd.concat(resp_dscore)

In [121]:
df_dscore.rename(columns={'dscore' : 'dscore_segmento'}, inplace=True)

In [124]:
engine = create_engine("mysql+pymysql://capMaster:#jackpot123#@captalys.cmrbivuuu7sv.sa-east-1.rds.amazonaws.com:23306/varejo")
con = engine.connect()
df_dscore.to_sql("debt_score", schema='credit_model', con=con, if_exists='append', index=False)
con.close()

In [125]:
resp_dscore = []
err6 =[]

In [126]:
for el in tqdm_notebook(lista_wire):
    try:
        ds = DScoring(cnpj=el, produto='wirecard', baseline_type='padrao')
        res, dfcalc = ds.calcula(update=False)

        if not dfcalc is None:
            resp_dscore.append(dfcalc)
    except:
        print("ERROR")
        err6.append(el)

HBox(children=(IntProgress(value=0, max=670), HTML(value='')))

ZSCORE : 398.89203417327775
ZSCORE : 608.3975757575757
ZSCORE : 476.3555555555556
ZSCORE : 683.888484848485
ZSCORE : 506.540606060606
ZSCORE : 340.7372603588985
ZSCORE : 383.5919191919191


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.




ZSCORE : 598.7121212121212
ZSCORE : 567.0808080808081
ZSCORE : 656.2666666666668
ZSCORE : 723.6969696969697
ZSCORE : 531.5479797979798
ZSCORE : 833.3333333333334
ZSCORE : 584.2327272727274
ZSCORE : 603.2717195766513
ZSCORE : 568.1987878787878
ZSCORE : 489.82828282828274
ZSCORE : 567.9468214811259
ZSCORE : 621.9577205607138
ZSCORE : 244.063973063973
ZSCORE : 415.62626262626264
ZSCORE : 582.1881671081613
ZSCORE : 282.94725028058355
ZSCORE : 407.21999999999997
ZSCORE : 588.098106835275
ZSCORE : 370.77441077441074
ZSCORE : 338.72424242424245
ZSCORE : 563.5414910366653
ZSCORE : 0.0
ZSCORE : 423.8141414141413
ZSCORE : 522.2585858585859
ZSCORE : 198.8383838383838
ZSCORE : 622.479797979798
ZSCORE : 643.0
ZSCORE : 672.631313131313
ZSCORE : 662.4242424242424
ZSCORE : 302.0
ZSCORE : 566.9191919191919
ZSCORE : 502.56590909090903
ZSCORE : 332.6257575757575
ZSCORE : 602.7313109263454
ZSCORE : 692.1969696969697
ZSCORE : 454.06545454545466
ERROR
ZSCORE : 508.6343434343435
ZSCORE : 338.0808080808081
ER

In [127]:
err6

['18571148000141', '19278541000295', '19878202000169', '27643911000193']

In [128]:
df_dscore = pd.concat(resp_dscore)

In [130]:
df_dscore.rename(columns={'dscore' : 'dscore_segmento'}, inplace=True)

In [132]:
df_dscore.head()

Unnamed: 0,segmento,ocorrencias,probabilidade,valor_divida,composicao,fat_medio,pi,lambda,risco,dscore_segmento,baseline_type,baseline,fator_elegibilidade,cnpj,produto,data_ref
0,outros,1,1.0,277.9,1.0,27652.575,0.005025,0.00201,0.00201,997.990061,padrao,1000,2,1056417000139,wirecard,2019-06-16
0,outros,1,1.0,2414.14,1.0,21659.558333,0.055729,0.022292,0.022292,977.708317,padrao,1000,2,1799108000159,wirecard,2019-06-16
0,outros,4,1.0,3173.38,1.0,12124.92,0.130862,0.052345,0.052345,947.655242,padrao,1000,2,2392887000136,wirecard,2019-06-16
0,processos,1,1.0,7554.62,1.0,22063.514167,0.171202,0.136961,0.136961,863.038681,padrao,1000,2,2800667000102,wirecard,2019-06-16
0,outros,20,1.0,14946.19,1.0,11084.859167,0.674171,0.269669,0.269669,730.331441,padrao,1000,2,3347828000109,wirecard,2019-06-16


In [133]:
engine = create_engine("mysql+pymysql://capMaster:#jackpot123#@captalys.cmrbivuuu7sv.sa-east-1.rds.amazonaws.com:23306/varejo")
con = engine.connect()
df_dscore.to_sql("debt_score", schema='credit_model', con=con, if_exists='append', index=False)
con.close()

In [134]:
df = pd.DataFrame()
df['erro'] = err + err2 + err3 + err4 + err5 + err6

In [136]:
df.to_excel("erro_calculo_dscore.xlsx")