In [35]:
import json
import pandas as pd

from plotly import graph_objects as go
from plotly import express as px
from plotly.graph_objs.layout import Template
from plotly.express import colors as pcolors
from plotly import data as pdata

df = pdata.tips()
df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


In [49]:
%timeit
# curried function
def calc_deviation_val(col_name: str):
    def calc_deviation_val(df: pd.DataFrame):
        ser_std: float = df[col_name].std(ddof=0)
        ser_mean: float = df[col_name].mean()
        return lambda ser: (ser - ser_mean) / ser_std * 10 + 50
    return calc_deviation_val

def execute_calc_deviation_val(df: pd.DataFrame, calc_input_colname: str) -> pd.Series:
    return df[calc_input_colname].map(calc_deviation_val(calc_input_colname)(df))

ser_dev_female = execute_calc_deviation_val(df[df['sex']=='Female'], 'total_bill')
ser_dev_female.rename('DeviationValue_by_sex', inplace=True)

ser_dev_male = execute_calc_deviation_val(df[df['sex']=='Male'], 'total_bill')
ser_dev_male.rename('DeviationValue_by_sex', inplace=True)

ser_dev = pd.concat([ser_dev_male, ser_dev_female], axis=0)
# df = pd.concat([df, ser_dev], axis=1)
df['DeviationValue_by_sex'] = ser_dev

# スウォームプロットの作成
fig = px.strip(
    df, 
    x="sex", 
    y="DeviationValue_by_sex", 
    color="day",
    title="Total Bill Distribution by Day and Gender")
fig.show()


In [41]:
# スウォームプロットの作成
fig = px.strip(
    df, 
    x="sex", 
    y="total_bill", 
    color="day",
    title="Total Bill Distribution by Day and Gender")
fig.show()


In [51]:
import math
import numpy as np

def count_by_bin(df: pd.DataFrame, calc_target_col: str, bins: list[float], grouping_col: str) -> pd.DataFrame:
    filtered_labels = np.sort(df[grouping_col].unique())

    df_count_by_bin = pd.cut(df.loc[:, calc_target_col], bins=bins, right=False) \
                            .value_counts() \
                            .sort_index()

    for filtered_group in filtered_labels:
        df_count_by_bin = pd.concat([df_count_by_bin, 
                                        pd.cut(df[df[grouping_col]==filtered_group].loc[:, calc_target_col], bins=bins, right=False) \
                                            .value_counts().sort_index()
                                    ], axis=1)

    column_names = ['all'] + filtered_labels.tolist()

    df_count_by_bin.columns = ['all'] + filtered_labels.tolist()
    return df_count_by_bin


calc_target_col = 'total_bill'
min = df[calc_target_col].min()
max = df[calc_target_col].max()
interval = 5
bins = [round(x, 3) for x in np.arange(math.floor(min), math.ceil(max)+interval, interval)]

grouping_col = 'sex'
display(count_by_bin(df, calc_target_col, bins, grouping_col))

grouping_col = 'day'
display( count_by_bin(df, calc_target_col, bins, grouping_col))

Unnamed: 0_level_0,all,Female,Male
total_bill,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"[3, 8)",7,3,4
"[8, 13)",47,21,26
"[13, 18)",73,30,43
"[18, 23)",48,14,34
"[23, 28)",28,9,19
"[28, 33)",20,4,16
"[33, 38)",7,4,3
"[38, 43)",7,0,7
"[43, 48)",3,2,1
"[48, 53)",4,0,4


Unnamed: 0_level_0,all,Fri,Sat,Sun,Thur
total_bill,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"[3, 8)",7,1,3,1,2
"[8, 13)",47,6,15,10,16
"[13, 18)",73,6,25,22,20
"[18, 23)",48,3,19,13,13
"[23, 28)",28,1,10,13,4
"[28, 33)",20,1,7,9,3
"[33, 38)",7,0,1,4,2
"[38, 43)",7,1,3,2,1
"[43, 48)",3,0,1,1,1
"[48, 53)",4,0,3,1,0


In [52]:
class Binning:

    def __init__(self, bins: list[float], ser_for_binning: pd.Series):
        self.bins: list[float] = bins
        self.ser_for_binning: pd.Series = ser_for_binning

    # HACK, TODO
    def count_by_bin(df: pd.DataFrame, calc_target_col: str, bins: list[float], grouping_col: str) -> pd.DataFrame:
        filtered_labels = np.sort(df[grouping_col].unique())

        df_count_by_bin = pd.cut(df.loc[:, calc_target_col], bins=bins, right=False) \
                                .value_counts() \
                                .sort_index()

        for filtered_group in filtered_labels:
            df_count_by_bin = pd.concat([df_count_by_bin, 
                                            pd.cut(df[df[grouping_col]==filtered_group].loc[:, calc_target_col], bins=bins, right=False) \
                                                .value_counts().sort_index()
                                        ], axis=1)

        column_names = ['all'] + filtered_labels.tolist()

        df_count_by_bin.columns = ['all'] + filtered_labels.tolist()
        return df_count_by_bin
    
