In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from scipy import stats

from analysis_toolkit import setup_latex, util, plots
from analysis_toolkit import definitions as defs

summary = pd.read_csv('summary.csv', dtype=defs.COLUMNS)
setup_latex()

In [2]:
FIG_TEXT = r"""
\begin{{figure}}[H]
    \centering
    \includegraphics[width=0.8\textwidth]{{plots/p1_{plot}_{metric}/d={d},p={p},k={k}.pdf}}
    \caption{{{metric_desc} | {d} Dimensions, {k} Clusters ({p} Points) | {plot_desc}}}
    \label{{fig:{metric}_d{d}_k{k}_{plot}}}
\end{{figure}}"""

for d, k in [(3, 3), (3, 5), (3, 8), (5, 3), (8, 3)]:
    for metric, metric_desc in [('rand', 'Oracle Normalized Rand Score'), ('nmi', 'Oracle Normalized NMI')]:
        for plot, desc in [('boxplot', 'Boxplot of 100 Runs'), ('se', '2SE Intervals; arrow denotes intervals less than 0.8')]:
            print(FIG_TEXT.format(
                d=d, k=k, p=d * k * 80, plot=plot,
                metric=metric, metric_desc=metric_desc, plot_desc=desc))



\begin{figure}[H]
    \centering
    \includegraphics[width=0.8\textwidth]{plots/p1_boxplot_rand/d=3,p=720,k=3.pdf}
    \caption{Oracle Normalized Rand Score | 3 Dimensions, 3 Clusters (720 Points) | Boxplot of 100 Runs}
    \label{fig:rand_d3_k3_boxplot}
\end{figure}

\begin{figure}[H]
    \centering
    \includegraphics[width=0.8\textwidth]{plots/p1_se_rand/d=3,p=720,k=3.pdf}
    \caption{Oracle Normalized Rand Score | 3 Dimensions, 3 Clusters (720 Points) | 2SE Intervals; arrow denotes intervals less than 0.8}
    \label{fig:rand_d3_k3_se}
\end{figure}

\begin{figure}[H]
    \centering
    \includegraphics[width=0.8\textwidth]{plots/p1_boxplot_nmi/d=3,p=720,k=3.pdf}
    \caption{Oracle Normalized NMI | 3 Dimensions, 3 Clusters (720 Points) | Boxplot of 100 Runs}
    \label{fig:nmi_d3_k3_boxplot}
\end{figure}

\begin{figure}[H]
    \centering
    \includegraphics[width=0.8\textwidth]{plots/p1_se_nmi/d=3,p=720,k=3.pdf}
    \caption{Oracle Normalized NMI | 3 Dimensions, 3 Clusters (72

In [3]:
def stats_table(key):
    res = [
        '\hline',
        '$(d, k)$ & {} \\\\'.format(
            ' & '.join([
                '$({}, {})$'.format(t['d'], t['k'])
                for t in defs.TESTS_P1
            ])),
        '\hline'
    ]

    for method, desc in defs.METHODS_P1.items():
        data = [util.oracle_ratio(summary, key=key, method=method, **t) for t in defs.TESTS_P1]

        body = ' & '.join([
            "${:.3f} \pm {:.3f}$".format(np.mean(d), 2 * stats.sem(d)) for d in data])
        res.append('{} & {} \\\\'.format(desc, body))

    res.append('\hline')
    res.insert(10, '\hline')
    return '\n'.join(res)

In [4]:
print(stats_table('rand'))

\hline
$(d, k)$ & $(3, 3)$ & $(3, 5)$ & $(3, 8)$ & $(5, 3)$ & $(8, 3)$ \\
\hline
MFM: poisson$(k + 2)$ & $0.949 \pm 0.015$ & $0.936 \pm 0.013$ & $0.938 \pm 0.009$ & $0.983 \pm 0.004$ & $0.994 \pm 0.002$ \\
MFM: (5,1) hybrid & $0.958 \pm 0.011$ & $0.939 \pm 0.011$ & $0.938 \pm 0.009$ & $0.981 \pm 0.007$ & $0.994 \pm 0.002$ \\
MFM: poisson$(k - 2)$ & $0.964 \pm 0.010$ & $0.943 \pm 0.010$ & $0.940 \pm 0.007$ & $0.982 \pm 0.006$ & $0.994 \pm 0.002$ \\
MFM: gibbs & $0.925 \pm 0.032$ & $0.881 \pm 0.035$ & $0.885 \pm 0.033$ & $0.808 \pm 0.056$ & $0.018 \pm 0.020$ \\
MFM: split merge & $0.718 \pm 0.030$ & $0.670 \pm 0.028$ & $0.663 \pm 0.028$ & $0.742 \pm 0.034$ & $0.873 \pm 0.015$ \\
MFM: (3,1) hybrid & $0.955 \pm 0.012$ & $0.939 \pm 0.011$ & $0.939 \pm 0.008$ & $0.978 \pm 0.010$ & $0.995 \pm 0.002$ \\
MFM: (10,1) hybrid & $0.961 \pm 0.010$ & $0.941 \pm 0.010$ & $0.939 \pm 0.009$ & $0.983 \pm 0.005$ & $0.994 \pm 0.002$ \\
\hline
DPM: $\alpha=0.1$ & $0.963 \pm 0.009$ & $0.943 \pm 0.010$ & $0.9

In [5]:
print(stats_table('nmi'))

\hline
$(d, k)$ & $(3, 3)$ & $(3, 5)$ & $(3, 8)$ & $(5, 3)$ & $(8, 3)$ \\
\hline
MFM: poisson$(k + 2)$ & $0.940 \pm 0.015$ & $0.935 \pm 0.011$ & $0.939 \pm 0.007$ & $0.979 \pm 0.005$ & $0.991 \pm 0.002$ \\
MFM: (5,1) hybrid & $0.949 \pm 0.012$ & $0.938 \pm 0.010$ & $0.937 \pm 0.008$ & $0.978 \pm 0.005$ & $0.991 \pm 0.002$ \\
MFM: poisson$(k - 2)$ & $0.958 \pm 0.011$ & $0.943 \pm 0.010$ & $0.940 \pm 0.006$ & $0.980 \pm 0.005$ & $0.992 \pm 0.002$ \\
MFM: gibbs & $0.922 \pm 0.031$ & $0.906 \pm 0.024$ & $0.909 \pm 0.023$ & $0.849 \pm 0.048$ & $0.023 \pm 0.025$ \\
MFM: split merge & $0.710 \pm 0.023$ & $0.717 \pm 0.017$ & $0.725 \pm 0.015$ & $0.720 \pm 0.027$ & $0.840 \pm 0.014$ \\
MFM: (3,1) hybrid & $0.947 \pm 0.013$ & $0.937 \pm 0.010$ & $0.939 \pm 0.007$ & $0.977 \pm 0.006$ & $0.992 \pm 0.002$ \\
MFM: (10,1) hybrid & $0.953 \pm 0.012$ & $0.940 \pm 0.010$ & $0.940 \pm 0.008$ & $0.978 \pm 0.005$ & $0.991 \pm 0.003$ \\
\hline
DPM: $\alpha=0.1$ & $0.956 \pm 0.010$ & $0.945 \pm 0.009$ & $0.9