In [1]:
#import helpsk as hlp
#import matplotlib.pyplot as plt
import plotly_express as px
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_openml
#from vega_datasets import data
# data.list_datasets()
# import seaborn as sns

# %matplotlib inline

In [2]:
from typing import Optional

def plotly_title(title: str, subtitle: Optional[str] = None) -> str:
    """Formats title and subtitle for plotly graphs."""
    if subtitle:
        return f'{title}<br><sup>{subtitle}</sup>'
    else:
        return title

def px_log_10_axis(fig, axis='x', min_value=-10, max_value=20, step=1):
    """
    Use this function to transform the ticks/labels of the x-axis of a plotly-express graph to
    log10.

    Example:

    ```
    fig = px.histogram(
        np.log10(values),
        title="Histogram of 'values' (Log10)"
    )
    px_log_10_x_axis(fig)
    ```
    """
    values = list(range(min_value, max_value, step))
    axis_value = dict(
        tickvals=values,
        ticktext=[10 ** x for x in values],
    )
    if axis == 'x':
        fig.update_layout(xaxis=axis_value)
    else:
        fig.update_layout(yaxis=axis_value)
    return fig


def px_3d_log_10_axis(fig, axis='x', min_value=-10, max_value=20, step=1):
    """
    Use this function to transform the ticks/labels of the x-axis of a plotly-express scatter_3d
    graph to log10.

    Example:

    ```
    fig = px.histogram(
        np.log10(values),
        title="Histogram of 'values' (Log10)"
    )
    px_log_10_x_axis(fig)
    ```
    """
    values = list(range(min_value, max_value, step))
    axis_value = dict(
        tickvals=values,
        ticktext=[10 ** x for x in values],
    )
    if axis == 'x':
        fig.update_scenes(xaxis=axis_value)
    elif axis == 'y':
        fig.update_scenes(yaxis=axis_value)
    else:
        fig.update_scenes(zaxis=axis_value)
    return fig


In [3]:
#https://www.openml.org/d/31
credit_g = fetch_openml('credit-g', version=1)
credit_data = credit_g['data']
credit_data['target'] = credit_g['target']
credit_data.shape

  warn(


(1000, 21)

In [4]:
credit_data.head()

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,...,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker,target
0,<0,6.0,critical/other existing credit,radio/tv,1169.0,no known savings,>=7,4.0,male single,none,...,real estate,67.0,none,own,2.0,skilled,1.0,yes,yes,good
1,0<=X<200,48.0,existing paid,radio/tv,5951.0,<100,1<=X<4,2.0,female div/dep/mar,none,...,real estate,22.0,none,own,1.0,skilled,1.0,none,yes,bad
2,no checking,12.0,critical/other existing credit,education,2096.0,<100,4<=X<7,2.0,male single,none,...,real estate,49.0,none,own,1.0,unskilled resident,2.0,none,yes,good
3,<0,42.0,existing paid,furniture/equipment,7882.0,<100,4<=X<7,2.0,male single,guarantor,...,life insurance,45.0,none,for free,1.0,skilled,2.0,none,yes,good
4,<0,24.0,delayed previously,new car,4870.0,<100,1<=X<4,3.0,male single,none,...,no known property,53.0,none,for free,2.0,skilled,2.0,none,yes,bad


In [5]:
#opps.group
fig = px.bar(
    credit_data.groupby(['credit_history', 'target']).size().reset_index(),
    x='credit_history',
    y=0,
    color='target',
    title=plotly_title(
        title="Main Title",
        subtitle="Subtitle"
    ),
    labels={
        'arr_difference': "ARR VARIANCE",
        '0': '# of Opps'
    },
    #markers=True,
    text=0,
)
fig.update_traces(textposition="inside")

---

In [6]:
from helpsk.string import format_number

x_var = 'credit_history'
color_var = 'target'
y_var = 'Count'

df = credit_data.groupby(x_var).size().reset_index().rename(columns={0: y_var})
df['__Text'] = df.apply(lambda x: f"{format_number(x[y_var], places=0)} ({round((x[y_var] / sum(df[y_var]) * 100), 1)}%)", axis=1)

fig = px.scatter(
    df,
    x=x_var,
    y=y_var,
    color=['black']*len(df),
    color_discrete_map="identity",
    text='__Text',
)
fig = fig.update_traces(textposition='top center')

df_2 = credit_data.groupby([x_var, color_var]).size().reset_index().rename(columns={0: y_var})
df_2['__group_total'] = df_2.groupby(x_var)[y_var].transform(sum)
df_2['__Text'] = df_2.apply(lambda x: f"{format_number(x[y_var], places=0)} ({round((x[y_var] / x['__group_total'] * 100), 1)}%)", axis=1)

fig2 = px.bar(
    df_2,
    x=x_var,
    y=y_var,
    color=color_var,
    barmode='group',
    text='__Text',
    title=plotly_title(
        title="Example",
        subtitle="Points represent group total."
    ),
)
fig2.update_traces(textposition="auto")
fig2.add_trace(fig.data[0])
fig2

---

# Log Axis

In [7]:
credit_data

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,...,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker,target
0,<0,6.0,critical/other existing credit,radio/tv,1169.0,no known savings,>=7,4.0,male single,none,...,real estate,67.0,none,own,2.0,skilled,1.0,yes,yes,good
1,0<=X<200,48.0,existing paid,radio/tv,5951.0,<100,1<=X<4,2.0,female div/dep/mar,none,...,real estate,22.0,none,own,1.0,skilled,1.0,none,yes,bad
2,no checking,12.0,critical/other existing credit,education,2096.0,<100,4<=X<7,2.0,male single,none,...,real estate,49.0,none,own,1.0,unskilled resident,2.0,none,yes,good
3,<0,42.0,existing paid,furniture/equipment,7882.0,<100,4<=X<7,2.0,male single,guarantor,...,life insurance,45.0,none,for free,1.0,skilled,2.0,none,yes,good
4,<0,24.0,delayed previously,new car,4870.0,<100,1<=X<4,3.0,male single,none,...,no known property,53.0,none,for free,2.0,skilled,2.0,none,yes,bad
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,no checking,12.0,existing paid,furniture/equipment,1736.0,<100,4<=X<7,3.0,female div/dep/mar,none,...,real estate,31.0,none,own,1.0,unskilled resident,1.0,none,yes,good
996,<0,30.0,existing paid,used car,3857.0,<100,1<=X<4,4.0,male div/sep,none,...,life insurance,40.0,none,own,1.0,high qualif/self emp/mgmt,1.0,yes,yes,good
997,no checking,12.0,existing paid,radio/tv,804.0,<100,>=7,4.0,male single,none,...,car,38.0,none,own,1.0,skilled,1.0,none,yes,good
998,<0,45.0,existing paid,radio/tv,1845.0,<100,1<=X<4,4.0,male single,none,...,no known property,23.0,none,for free,1.0,skilled,1.0,yes,yes,bad


In [8]:
_log_10 = credit_data[[
        'duration',
        'credit_amount',
        'age',
    ]].\
    copy()

_log_10['duration'] = np.log10(_log_10['duration'].values)
_log_10['credit_amount'] = np.log10(_log_10['credit_amount'].values)
_log_10['age'] = np.log10(_log_10['age'].values)
_log_10


Unnamed: 0,duration,credit_amount,age
0,0.778151,3.067815,1.826075
1,1.681241,3.774590,1.342423
2,1.079181,3.321391,1.690196
3,1.623249,3.896636,1.653213
4,1.380211,3.687529,1.724276
...,...,...,...
995,1.079181,3.239550,1.491362
996,1.477121,3.586250,1.602060
997,1.079181,2.905256,1.579784
998,1.653213,3.265996,1.361728


In [9]:
px.scatter(
    credit_data,
    x='duration',
    y='credit_amount',
    log_x=True,
    log_y=True,
)

In [10]:
fig = px.scatter(
    _log_10,
    x='duration',
    y='credit_amount',
    # log_x=True,
    # log_y=True,
)
px_log_10_axis(fig, axis='x')
px_log_10_axis(fig, axis='y')
fig

---

In [13]:
credit_data.head()

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,...,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker,target
0,<0,6.0,critical/other existing credit,radio/tv,1169.0,no known savings,>=7,4.0,male single,none,...,real estate,67.0,none,own,2.0,skilled,1.0,yes,yes,good
1,0<=X<200,48.0,existing paid,radio/tv,5951.0,<100,1<=X<4,2.0,female div/dep/mar,none,...,real estate,22.0,none,own,1.0,skilled,1.0,none,yes,bad
2,no checking,12.0,critical/other existing credit,education,2096.0,<100,4<=X<7,2.0,male single,none,...,real estate,49.0,none,own,1.0,unskilled resident,2.0,none,yes,good
3,<0,42.0,existing paid,furniture/equipment,7882.0,<100,4<=X<7,2.0,male single,guarantor,...,life insurance,45.0,none,for free,1.0,skilled,2.0,none,yes,good
4,<0,24.0,delayed previously,new car,4870.0,<100,1<=X<4,3.0,male single,none,...,no known property,53.0,none,for free,2.0,skilled,2.0,none,yes,bad


In [22]:
fig = px.scatter_3d(
    credit_data,
    x='duration',
    y='credit_amount',
    z='age',
    # log_z=True,
    size='num_dependents',
    color='housing',
    category_orders={'housing': ['for free', 'rent', 'own']},
    color_discrete_map={
        'for free': 'red',
        'rent': 'blue',
        'own': 'green',
    },
    hover_data={'checking_status': True},
    log_x=True,
    log_y=True,
    opacity=0.3,
    width=700,
    height=700
)
fig.update_layout(margin=dict(l=5, r=5, t=20, b=5))
fig.update_layout(
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=0.01
    )
)
fig

---