# Bibliotecas

In [25]:
import pandas as pd
import numpy as np

# Dados

## 📊 Dicionário de Dados (Data Dictionary)

### **Dataset: `application_record.csv`**

| Nome da Coluna           | Descrição                                                                 | Observações                                                                 |
|--------------------------|---------------------------------------------------------------------------|----------------------------------------------------------------------------|
| **ID**                   | Número único de identificação do cliente                                 |                                                                             |
| **CODE_GENDER**          | Gênero do cliente                                                        | Valores: `M` (Masculino), `F` (Feminino)                                   |
| **FLAG_OWN_CAR**         | Indica se o cliente possui carro                                         | Valores: `Y` (Sim), `N` (Não)                                              |
| **FLAG_OWN_REALTY**      | Indica se o cliente possui imóvel                                        | Valores: `Y` (Sim), `N` (Não)                                              |
| **CNT_CHILDREN**         | Número de filhos do cliente                                              |                                                                            |
| **AMT_INCOME_TOTAL**     | Renda anual total do cliente                                             | Em moeda local                                                             |
| **NAME_INCOME_TYPE**     | Categoria de renda do cliente                                            | Ex: "Working", "Commercial associate", "Pensioner"                         |
| **NAME_EDUCATION_TYPE**  | Nível de educação do cliente                                             | Ex: "Secondary education", "Higher education"                              |
| **NAME_FAMILY_STATUS**   | Estado civil do cliente                                                  | Ex: "Married", "Single", "Civil marriage"                                  |
| **NAME_HOUSING_TYPE**    | Tipo de moradia do cliente                                               | Ex: "House/apartment", "Rented apartment"                                  |
| **DAYS_BIRTH**           | Data de nascimento em dias                                               | Contagem regressiva a partir da data atual (valor negativo). Ex: -12000 ≈ 32.8 anos |
| **DAYS_EMPLOYED**        | Data de início do emprego em dias                                        | Contagem regressiva a partir da data atual. Valores positivos indicam desemprego atual |
| **FLAG_MOBIL**           | Indica se o cliente possui telefone móvel                                | Valores: `1` (Sim), `0` (Não)                                              |
| **FLAG_WORK_PHONE**      | Indica se o cliente possui telefone corporativo                          | Valores: `1` (Sim), `0` (Não)                                              |
| **FLAG_PHONE**           | Indica se o cliente possui telefone fixo                                 | Valores: `1` (Sim), `0` (Não)                                              |
| **FLAG_EMAIL**           | Indica se o cliente possui e-mail                                        | Valores: `1` (Sim), `0` (Não)                                              |
| **OCCUPATION_TYPE**      | Tipo de ocupação profissional                                            | Ex: "Laborers", "Core staff", "Managers"                                   |
| **CNT_FAM_MEMBERS**      | Número total de membros da família                                       |                                                                             |

---

### **Dataset: `credit_record.csv`**

| Nome da Coluna        | Descrição                                                                 | Observações                                                                 |
|-----------------------|---------------------------------------------------------------------------|----------------------------------------------------------------------------|
| **ID**                | Número único de identificação do cliente                                 | Corresponde ao ID em `application_record.csv`                              |
| **MONTHS_BALANCE**    | Mês de referência do registro                                            | Contagem regressiva a partir do mês atual. `0` = mês atual, `-1` = anterior |
| **STATUS**            | Status de pagamento no mês                                               | Valores: <br>`C` = Pagamento realizado no mês <br>`X` = Sem empréstimo no mês <br>`0` = 1-29 dias de atraso <br>`1` = 30-59 dias de atraso <br>`2` = 60-89 dias de atraso <br>`3` = 90-119 dias de atraso <br>`4` = 120-149 dias de atraso <br>`5` = 150+ dias de atraso ou inadimplência grave |

## Carregando Dados

In [None]:
application = pd.read_csv("../data/raw/application_record.csv")
credit = pd.read_csv("../data/raw/credit_record.csv")

## Verificando Dados

### Application

In [19]:
print(f"Application shape: {application.shape}")

Application shape: (438557, 18)


In [21]:
application.head(5)

Unnamed: 0,ID,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,DAYS_BIRTH,DAYS_EMPLOYED,FLAG_MOBIL,FLAG_WORK_PHONE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,CNT_FAM_MEMBERS
0,5008804,M,Y,Y,0,427500.0,Working,Higher education,Civil marriage,Rented apartment,-12005,-4542,1,1,0,0,,2.0
1,5008805,M,Y,Y,0,427500.0,Working,Higher education,Civil marriage,Rented apartment,-12005,-4542,1,1,0,0,,2.0
2,5008806,M,Y,Y,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,-21474,-1134,1,0,0,0,Security staff,2.0
3,5008808,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,1,0,1,1,Sales staff,1.0
4,5008809,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,1,0,1,1,Sales staff,1.0


In [27]:
application.describe(include=[np.number])

Unnamed: 0,ID,CNT_CHILDREN,AMT_INCOME_TOTAL,DAYS_BIRTH,DAYS_EMPLOYED,FLAG_MOBIL,FLAG_WORK_PHONE,FLAG_PHONE,FLAG_EMAIL,CNT_FAM_MEMBERS
count,438557.0,438557.0,438557.0,438557.0,438557.0,438557.0,438557.0,438557.0,438557.0,438557.0
mean,6022176.0,0.42739,187524.3,-15997.904649,60563.675328,1.0,0.206133,0.287771,0.108207,2.194465
std,571637.0,0.724882,110086.9,4185.030007,138767.799647,0.0,0.404527,0.452724,0.310642,0.897207
min,5008804.0,0.0,26100.0,-25201.0,-17531.0,1.0,0.0,0.0,0.0,1.0
25%,5609375.0,0.0,121500.0,-19483.0,-3103.0,1.0,0.0,0.0,0.0,2.0
50%,6047745.0,0.0,160780.5,-15630.0,-1467.0,1.0,0.0,0.0,0.0,2.0
75%,6456971.0,1.0,225000.0,-12514.0,-371.0,1.0,0.0,1.0,0.0,3.0
max,7999952.0,19.0,6750000.0,-7489.0,365243.0,1.0,1.0,1.0,1.0,20.0


In [28]:
application.describe(include=[object])

Unnamed: 0,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,OCCUPATION_TYPE
count,438557,438557,438557,438557,438557,438557,438557,304354
unique,2,2,2,5,5,5,6,18
top,F,N,Y,Working,Secondary / secondary special,Married,House / apartment,Laborers
freq,294440,275459,304074,226104,301821,299828,393831,78240


### Credit

In [20]:
print(f"Credit shape: {credit.shape}")

Credit shape: (1048575, 3)


In [4]:
credit.head(5)

Unnamed: 0,ID,MONTHS_BALANCE,STATUS
0,5001711,0,X
1,5001711,-1,0
2,5001711,-2,0
3,5001711,-3,0
4,5001712,0,C


In [29]:
credit.describe(include=[np.number])

Unnamed: 0,ID,MONTHS_BALANCE
count,1048575.0,1048575.0
mean,5068286.0,-19.137
std,46150.58,14.0235
min,5001711.0,-60.0
25%,5023644.0,-29.0
50%,5062104.0,-17.0
75%,5113856.0,-7.0
max,5150487.0,0.0


In [30]:
credit.describe(include=[object])

Unnamed: 0,STATUS
count,1048575
unique,8
top,C
freq,442031
