<a href="https://colab.research.google.com/github/o93/bayesian-ab/blob/main/BeyesianAB_PyMC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install gradio japanize_matplotlib

Collecting gradio
  Downloading gradio-3.44.3-py3-none-any.whl (20.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.2/20.2 MB[0m [31m66.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting japanize_matplotlib
  Downloading japanize-matplotlib-1.1.3.tar.gz (4.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.1/4.1 MB[0m [31m97.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.103.1-py3-none-any.whl (66 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.2/66.2 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ffmpy (from gradio)
  Downloading ffmpy-0.3.1.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gradio-client==0.5.0 (from gradio)
  Downloading gradio_client-0.5.0-py3-none-any.w

In [None]:
from pydantic.dataclasses import dataclass

import io
import gradio as gr
import pymc as pm
import arviz as az
import traceback

import matplotlib.pyplot as plt
import seaborn as sns
import japanize_matplotlib
from PIL import Image
from matplotlib.patches import Patch

import numpy as np
import pandas as pd


@dataclass
class Pattern:
    name: str
    alpha: float
    beta: float
    trials: int
    successes: int

@dataclass
class Comb:
    key: str
    challenger: int
    champion: int

def convert_params(params):
    param_size = 5
    all_size = len(params)
    pattern_size = all_size // param_size
    pattern_param_size = pattern_size * param_size

    patterns = []
    for i in range(0, pattern_param_size, param_size):
        if params[i] == '':
            break
        p = params[i:i + param_size]
        patterns.append(Pattern(*p))

    threshold = params[pattern_param_size]
    sample_size = int(params[pattern_param_size + 1])

    return patterns, threshold, sample_size

def make_trace(patterns, threshold, sample_size):
    pattern_size = len(patterns)

    combs = {
        2: [Comb('1_0', 1, 0)],
        3: [Comb('1_0', 1, 0), Comb('2_0', 2, 0), Comb('2_1', 2, 1)],
        4: [
            Comb('1_0', 1, 0), Comb('2_0', 2, 0), Comb('3_0', 3, 0),
            Comb('2_1', 2, 1), Comb('3_1', 3, 1), Comb('3_2', 3, 2),
        ],
    }[pattern_size]

    with pm.Model() as model:
        # 事前分布
        p = pm.Beta(
                    'p',
                    alpha=[p.alpha for p in patterns],
                    beta=[p.beta for p in patterns],
                    shape=pattern_size,
        )
        # 事後分布
        obs = pm.Binomial(
                    'y',
                    n=[p.trials for p in patterns],
                    observed=[p.successes for p in patterns],
                    p=p,
                    shape=pattern_size,
        )
        for c in combs:
            pm.Deterministic(c.key, p[c.challenger] / p[c.champion])

        return pm.sample(draws=sample_size), combs

def make_plot_image():
    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    buf.seek(0)
    img = Image.open(buf)
    img = np.array(img)
    plt.close()
    return img

def plot_probs(patterns, trace, threshold, sample_size):
    pattern_size = len(patterns)
    v = trace.posterior['p'].values
    names = [p.name for p in patterns]
    samples = pd.DataFrame(
        v.reshape((v.shape[0] * v.shape[1], v.shape[2])),
        columns=names,
    )
    samples = samples.stack().to_frame()
    samples.reset_index(level=1, inplace=True)
    samples.columns = ['項目名', '成功率']

    colors = sns.color_palette('muted')

    plt.figure(figsize=(10, 4))
    sns.histplot(
        data=samples, x='成功率', hue='項目名',
        bins=100, hue_order=names,
        stat='probability', element='step',
        palette=colors[:pattern_size],
    )
    labels = []
    for i, name in enumerate(names):
        g = samples[samples['項目名'] == name]
        mean = g['成功率'].mean()
        plt.axvline(x=mean, color=colors[i], linestyle='dashed')
        labels.append(Patch(facecolor=colors[i], edgecolor=colors[i], label=f'{name} 平均:{mean:.3f}'))
    plt.legend(title='項目名', handles=labels)

    plt.grid(False)

    return make_plot_image()

def plot_combs(patterns, trace, combs, threshold, sample_size):
    comb_size = len(combs)
    colors = sns.color_palette('muted')

    if comb_size > 1:
        fig, axes = plt.subplots(comb_size, 1, figsize=(10, comb_size * 4))
    else:
        fig, ax = plt.subplots(comb_size, 1, figsize=(10, comb_size * 4))
        axes = [ax]

    for i, c in enumerate(combs):
        name = f'"{patterns[c.champion].name}"より"{patterns[c.challenger].name}"が何倍良い？'
        v = trace.posterior[c.key].values
        samples = pd.DataFrame(
            v.reshape((v.shape[0] * v.shape[1])),
            columns=[name],
        )

        counts = samples[name].value_counts(bins=100, sort=False)
        counts.index = counts.index.left
        rates = counts.to_frame()
        rates[name] = rates[name] / rates[name].sum()

        ax = axes[i]
        ax.plot(rates, color='gray', label='確率分布')
        ax.set_facecolor((1, 1, 1, 1))

        hdi = az.hdi(samples[name].values, hdi_prob=threshold)
        index = rates[name].index
        region = (hdi[0] < index) & (index < hdi[1])
        color = colors[c.challenger]

        ax.fill_between(
            index[region], rates[name][region], 0, alpha=0.3,
            color=color, hatch='xx', label=f'HDI:{threshold * 100:.0f}%',
        )
        ax.axvline(
            x=samples[name].mean(), color=color,
            label=f'平均:{samples[name].mean():.2f}倍', linestyle='dashed',
        )
        ax.axvline(x=1.0, color='red')
        ax.text(hdi[0], 0, f'{hdi[0]:.3f}', ha='center', va='top', color=color, size='large')
        ax.text(hdi[1], 0, f'{hdi[1]:.3f}', ha='center', va='top', color=color, size='large')
        ax.grid(False)
        ax.legend()
        ax.set_title(name)

    return make_plot_image()

def beyesian_ab(*params):
    try:
        # 各種パラメータ取得
        patterns, threshold, sample_size = convert_params(params)

        # ベイジアンモデル
        trace, combs = make_trace(patterns, threshold, sample_size)

        # 成約率一覧
        s_probs = plot_probs(patterns, trace, threshold, sample_size)

        # 有意差一覧
        s_combs = plot_combs(patterns, trace, combs, threshold, sample_size)

        return s_probs, s_combs, ''
    except Exception as e:
        return None, None, str(traceback.format_exc())

def change_prior(prior_index):
            if prior_index == 0:
                return gr.update(value=1, interactive=False)
            else:
                return gr.update(interactive=True)

def add_pattern(name, default_value=10):
    with gr.Row():
        name = gr.Textbox(label='項目名', value=name, interactive=True)
        prior = gr.Dropdown(
            ['一様分布', 'ベータ分布'], label='事前分布の選択', type='index', value='一様分布',
            interactive=True, scale=1)
        prior_alpha = gr.Number(label='アルファ', value=1, minimum=0, interactive=False)
        prior_beta = gr.Number(label='ベータ', value=1, minimum=0, interactive=False)

        prior.change(change_prior, inputs=prior, outputs=prior_alpha)
        prior.change(change_prior, inputs=prior, outputs=prior_beta)

        posterior_trials = gr.Number(label='試行数', value=1000, minimum=1, interactive=True)
        posterior_successes = gr.Number(label='成功数', value=default_value, minimum=0, interactive=True)

    return name, prior_alpha, prior_beta, posterior_trials, posterior_successes

with gr.Blocks() as app:
    with gr.Row():
        with gr.Column():
            params_a = add_pattern('A')
            params_b = add_pattern('B')
            params_c = add_pattern('', default_value=15)
            params_d = add_pattern('', default_value=25)

            params = list(params_a)
            params.extend(params_b)
            params.extend(params_c)
            params.extend(params_d)

            with gr.Row():
                threshold = gr.Number(
                    label='判断基準となる確率', value=0.9, minimum=0, maximum=1,
                    step=None, interactive=True,
                )
                sample_size = gr.Number(label='サンプルサイズ', value=5000, minimum=0, interactive=True)
                run = gr.Button('実行')

                params.append(threshold)
                params.append(sample_size)

        with gr.Column():
            s_probs = gr.Image(label='成約率')
            s_combs = gr.Image(label='成約率の差')

    run.click(fn=beyesian_ab, inputs=params, outputs=[s_probs, s_combs], api_name='beyesian_ab')

app.launch(height=1280)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Note: opening Chrome Inspector may crash demo inside Colab notebooks.

To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>

