In [1]:
import re
import json
import time as time
import numpy as np
import pandas as pd
from functools import partial
from datetime import date, timedelta

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
import plotly.figure_factory as ff
import plotly.express as px
import plotly.offline as pyo

## Own specific functions 
from functions import *

In [2]:
def calculate_years(days):
    """
    Method used to calculate years based on date (today - quantity of days).

    Parameters:
    -----------------
        days (int): Numbers of day to rest of today

    Returns:
    -----------------
        years (int): Numbers of years
    """

    today = date.today()
    initial_date = today - timedelta(abs(days))
    years = today.year - initial_date.year - ((today.month, today.day) < (initial_date.month, initial_date.day))

    return years

In [3]:
# Columns to read on CSVs
COLUMNS = [
    "SK_ID_CURR", "CODE_GENDER", "DAYS_BIRTH", "DAYS_EMPLOYED",
    "CNT_CHILDREN", "FLAG_OWN_REALTY", "FLAG_OWN_CAR",
    "AMT_INCOME_TOTAL", "AMT_CREDIT", "TARGET"
]

In [4]:
# Reading the csv
df_clients_to_predict = pd.read_csv("datasets/df_customers_to_predict.csv")

In [5]:
# Reading the csv
df_optimized = pd.read_csv("datasets/df_optimized.csv", usecols=COLUMNS, low_memory=True)

In [6]:
df_optimized["AGE"] = df_optimized["DAYS_BIRTH"].apply(lambda x: calculate_years(x))

In [7]:
df_optimized.head()

Unnamed: 0,SK_ID_CURR,TARGET,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,DAYS_BIRTH,DAYS_EMPLOYED,AGE
0,100002,1.0,0,0,0,0,202500.0,406597.5,-9461,-637.0,25
1,100003,0.0,1,0,1,0,270000.0,1293502.5,-16765,-1188.0,45
2,100004,0.0,0,1,0,0,67500.0,135000.0,-19046,-225.0,52
3,100006,0.0,1,0,0,0,135000.0,312682.5,-19005,-3039.0,52
4,100007,0.0,0,0,0,0,121500.0,513000.0,-19932,-3038.0,54


In [8]:
df_customer_selected = df_clients_to_predict[df_clients_to_predict["SK_ID_CURR"]==100013]
df_customer_selected["AGE"] = df_customer_selected["DAYS_BIRTH"].apply(lambda x: calculate_years(x))
df_customer_selected

Unnamed: 0,SK_ID_CURR,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,REGION_POPULATION_RELATIVE,...,CC_NAME_CONTRACT_STATUS_Signed_MEAN,CC_NAME_CONTRACT_STATUS_Signed_SUM,CC_NAME_CONTRACT_STATUS_Signed_VAR,CC_NAME_CONTRACT_STATUS_nan_MIN,CC_NAME_CONTRACT_STATUS_nan_MAX,CC_NAME_CONTRACT_STATUS_nan_MEAN,CC_NAME_CONTRACT_STATUS_nan_SUM,CC_NAME_CONTRACT_STATUS_nan_VAR,CC_COUNT,AGE
2,100013,0,1,0,0,202500.0,663264.0,69777.0,630000.0,0.019101,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,96.0,54


In [24]:
df_customer_selected["AGE"].values[0]

54

In [13]:
df_optimized_by_target_repaid = df_optimized[df_optimized["TARGET"] == 0]
df_optimized_by_target_not_repaid = df_optimized[df_optimized["TARGET"] == 1]

In [15]:
ages_data_repaid = df_optimized_by_target_repaid.groupby("AGE").size()
ages_data_repaid = pd.DataFrame(ages_data_repaid).reset_index()
ages_data_repaid.columns = ["AGE", "AMOUNT"]
ages_data_repaid = ages_data_repaid.set_index("AGE").to_dict()["AMOUNT"]


ages_data_not_repaid = df_optimized_by_target_not_repaid.groupby("AGE").size()
ages_data_not_repaid = pd.DataFrame(ages_data_not_repaid).reset_index()
ages_data_not_repaid.columns = ["AGE", "AMOUNT"]
ages_data_not_repaid = ages_data_not_repaid.set_index("AGE").to_dict()["AMOUNT"]

In [16]:
ages_data_repaid_list = [key for key, val in ages_data_repaid.items() for _ in range(val)]
ages_data_not_repaid_list = [key for key, val in ages_data_not_repaid.items() for _ in range(val)]

In [29]:
config = {
    "displayModeBar": False,
    "displaylogo": False
}

group_labels = ["Repaid", "Not repaid"]

fig_ages = ff.create_distplot([ages_data_repaid_list, ages_data_not_repaid_list], 
                              group_labels, show_hist=False, show_rug=False, 
                             colors=["Green", "Red"])
fig_ages.update_layout(
    paper_bgcolor="white",
    font={
        "family": "sans serif"
    },
    autosize=False,
    width=500,
    height=360,
    margin=dict(
        l=50, r=50, b=0, t=20, pad=0
    ),
    title={
        "text" : "Client age vs Current clients",
        "y" : 1,
        "x" : 0.45,
        "xanchor" : "center",
        "yanchor" : "top"
    },
    xaxis_title="Ages",
    yaxis_title="Density",
    legend={
        "traceorder" : "normal"
    }
)
fig_ages.add_vline(x=df_customer_selected["AGE"].values[0], line_width=3,
                   line_dash="dash", line_color="green", annotation_text="client's age")
pyo.plot(fig_ages, config=config, filename="example2.html")

'example2.html'