In [None]:
import numpy as np
import pandas as pd
from ydata_profiling import ProfileReport

train_set = pd.read_csv("input/train.csv")

target = "Attrition_Flag"
numeric_features = [
    col
    for col in train_set.columns
    if pd.api.types.is_numeric_dtype(train_set[col]) and col != target
]
categoric_features = [
    col
    for col in train_set.columns
    if (not pd.api.types.is_numeric_dtype(train_set[col])) and col != target
]

In [None]:
train_profile = ProfileReport(
    train_set[numeric_features].assign(
        **{
            "Attrition_Flag": np.where(
                train_set["Attrition_Flag"] == "Attrited Customer", 0, 1
            )
        }
    ),
    title="Análise Exploratória - Features Numéricas",
    missing_diagrams=None,
    samples=None,
    duplicates=None,
    variables={
        "descriptions": {
            "Attrition_Flag": '0 = "Attrited Customer"; 1 = "Existing Customer"'
        }
    },
    correlations={
        "auto": {"calculate": False},
        "pearson": {"calculate": False},
        "spearman": {"calculate": False},
        "kendall": {"calculate": True},
        "phi_k": {"calculate": False},
        "cramers": {"calculate": False},
    },
)
train_profile.to_file("output/Análise Exploratória - Features Numéricas.html")

In [None]:
train_profile = ProfileReport(
    train_set[categoric_features + [target]],
    title="Análise Exploratória - Features Categóricas",
    missing_diagrams=None,
    samples=None,
    duplicates=None,
    correlations=None,
)
train_profile.to_file("output/Análise Exploratória - Features Categóricas.html")

In [None]:
train_profile_existing = ProfileReport(
    train_set.copy()[train_set["Attrition_Flag"] == "Existing Customer"],
    title="Análise Exploratória - Treino - Não churn",
    missing_diagrams=None,
    samples=None,
    duplicates=None,
    correlations=None,
)
train_profile_attrited = ProfileReport(
    train_set.copy()[train_set["Attrition_Flag"] == "Attrited Customer"],
    title="Análise Exploratória - Treino - Churn",
    missing_diagrams=None,
    samples=None,
    duplicates=None,
    correlations=None,
)
comparison_report_train = train_profile_existing.compare(train_profile_attrited)
comparison_report_train.to_file(
    "output/Análise Exploratória - Treino - Churn x Não Churn.html"
)