# Diverging Stacked Bar Chart を Altair で作る

## ダミーデータの生成

In [1]:
import pandas as pd
import numpy as np
from numpy.random import default_rng
import altair as alt

In [2]:
# choices and mid value

choices = ['反対', 'やや反対', 'どちらでもない', 'やや賛成', '賛成']
mid_value = (len(choices)+1)/2
if (len(choices)%2 != 0):
    mid_value = int(mid_value)

In [3]:
# generate dummy data

def generate_dummy_data_per_group(n, mu, sigma, bins):
    rng = default_rng()
    values = rng.normal(mu, sigma, size=n)
    values_digitized = np.digitize(values, bins=bins)
    df = pd.DataFrame(pd.Series(values_digitized).rename('value'));
    return df

def generate_dummy_data(n_groups=5, choices=choices):
    bins = np.linspace(0, 1, len(choices)+1)
    bins[0] = -np.inf
    bins[-1] = np.inf
    
    # generate data for each group and concat
    rng = default_rng()
    data = pd.DataFrame()
    for i in range(5):
        tmp = generate_dummy_data_per_group(n=int(rng.uniform(100,200)), mu=rng.normal(0.5, 0.2), sigma=0.5, bins=bins)
        tmp['attr'] = i
        data = pd.concat([data, tmp])
    return data.reset_index(drop=True)

def aggregate_dummy_data(data):
    # aggregate and convert to proportion (optional)
    data = data.reset_index(drop=True)
    data_count = data.groupby(['attr', 'value']).size().rename('count')
    data_proportion = pd.DataFrame(data_count.groupby(level=0).apply(lambda x: x / float(x.sum())).rename('proportion')).reset_index()
    return data_proportion

data_nonagg = generate_dummy_data()
data = aggregate_dummy_data(data_nonagg).rename({'proportion':'x'}, axis=1)

# プロット

## 準備：データを選択肢の正／負／中間に分割

In [4]:
pos = data[data['value'] > mid_value].copy()
neg = data[data['value'] < mid_value].copy()
mid = data[data['value'] == mid_value].copy()
mid_half = mid.copy()
mid_half['x'] = mid_half['x'] * .5

In [5]:
def to_cumsum_interval(df, negative=False, strong_first=False):
    tmp_cs = df.set_index(
        ['attr', 'value']
    ).sort_index(
        ascending = negative != (not strong_first) #xor
    ).groupby(level=0).cumsum()
    tmp_cs_x2 = tmp_cs.groupby(level=0).shift().rename({'x':'x2'}, axis=1).fillna(0)
    result = tmp_cs.join(tmp_cs_x2).reset_index()
    if(negative):
        result['x'] *= -1
        result['x2'] *= -1
    return result

## 1. よくある diverging stacked bar chart

In [6]:
pos_i = to_cumsum_interval(pd.concat([pos, mid_half]))
neg_i = to_cumsum_interval(pd.concat([neg, mid_half]), negative=True)
alt_data = pd.concat([pos_i, neg_i])
alt_data['value_label'] = alt_data['value'].apply(lambda x : choices[x-1])

In [7]:
diverging_chart = alt.Chart(alt_data).mark_bar().encode(
    x = alt.X(
        'x',
        axis = alt.Axis(
            title = 'Percentage',
            format = '%',
        )
    ),
    x2 = 'x2',
    y = 'attr:O',
    color = alt.Color(
        'value_label',
        scale=alt.Scale(
            domain=choices,
            scheme='redblue',
        )
    ),
)

diverging_chart

## 2. 強い意見を中央に集めたプロット

In [8]:
pos_i = to_cumsum_interval(pd.concat([pos, mid_half]), strong_first=True)
neg_i = to_cumsum_interval(pd.concat([neg, mid_half]), negative=True, strong_first=True)
alt_data = pd.concat([pos_i, neg_i])
alt_data['value_label'] = alt_data['value'].apply(lambda x : choices[x-1])

In [9]:
alt.Chart(alt_data).mark_bar().encode(
    x = alt.X(
        'x',
        axis = alt.Axis(
            title = 'Percentage',
            format = '%',
        )
    ),
    x2 = 'x2',
    y = 'attr:O',
    color = alt.Color(
        'value_label',
        scale=alt.Scale(
            domain=choices,
            scheme='redblue',
        )
    ),
)

## 3. 中間の選択肢を分離したプロット

In [10]:
pos_i = to_cumsum_interval(pos, strong_first=True)
neg_i = to_cumsum_interval(neg, negative=True, strong_first=True)
mid_i = to_cumsum_interval(mid)
alt_data = pd.concat([pos_i, neg_i])
alt_data['value_label'] = alt_data['value'].apply(lambda x : choices[x-1])
mid_i['value_label'] = mid_i['value'].apply(lambda x : choices[x-1])

In [11]:
x = alt.X(
    'x',
    axis = alt.Axis(
        title = 'Percentage',
        format = '%',
    )
)

color = alt.Color(
    'value_label',
    scale=alt.Scale(
        domain=choices,
        scheme='redblue',
    )
)

pn_chart = alt.Chart(
    alt_data,
    width=420
).mark_bar().encode(
    x = x,
    x2 = 'x2',
    y = 'attr:O',
    color = color,
)

mid_chart = alt.Chart(
    mid_i,
    width=60
).mark_bar().encode(
    x = x,
    y = alt.Y(
        'attr:O',
        axis=None,
    ),
    color = color,
)

(pn_chart | mid_chart).configure_concat(spacing=0)

## 参考. 100%積み上げ棒グラフ

In [12]:
alt_data = data.copy()
alt_data['value_text'] = alt_data['value'].apply(lambda x : choices[x-1])

alt.Chart(alt_data).mark_bar().encode(
    x = alt.X(
        'x',
        axis = alt.Axis(
            title = 'Percentage',
            format = '%',
        )
    ),
    y = 'attr:O',
    color = alt.Color(
        'value_text',
        scale=alt.Scale(
            domain=choices,
            scheme='redblue',
        )
    ),
    order=alt.Order(
        'value',
        sort='ascending'
    )
)