In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [2]:
data_dir = '/Users/nshah/work/vcimpute/data'

In [3]:
df = pd.DataFrame()
for i in range(10):
    tmp = pd.read_pickle(f'{data_dir}/experiment3_monotone_{i}.pkl')
    _df = pd.DataFrame(
        [e for lst in tmp for e in lst],
        columns=['tag','seed','n_fits','n_sims','n_mdps', 'n_cols_mis','n_cc','miss_by_col','raw_smae','elapsed','bias']
    )
    df = pd.concat([_df, df])

In [4]:
df['smae'] = df['raw_smae'].apply(np.nanmean)
df['smae2'] = df[['miss_by_col', 'raw_smae']].apply(lambda x: np.dot(x['miss_by_col'],np.nan_to_num(x['raw_smae']))/np.sum(x['miss_by_col']), axis=1)
df['elapsed_s'] = df['elapsed'] / int(1e9)

In [5]:
pd.set_option('display.max_rows', 800)
pd.set_option('display.float_format','{:,.2f}'.format)

In [6]:
print(df.groupby(['n_cols_mis','tag'])[['bias']].mean().unstack(-2).to_latex(float_format='{:,.2f}'.format))

\begin{tabular}{lrrrrrrrrr}
\toprule
{} & \multicolumn{9}{l}{bias} \\
n\_cols\_mis &    1 &    2 &    3 &    4 &    5 &    6 &    7 &    8 &    9 \\
tag      &      &      &      &      &      &      &      &      &      \\
\midrule
copfit   & 0.01 & 0.03 & 0.04 & 0.05 & 0.06 & 0.06 & 0.06 & 0.06 & 0.06 \\
copreg   & 0.00 & 0.01 & 0.01 & 0.02 & 0.02 & 0.03 & 0.03 & 0.03 & 0.03 \\
gcimpute & 0.00 & 0.01 & 0.01 & 0.02 & 0.02 & 0.03 & 0.03 & 0.04 & 0.04 \\
mdpfit   & 0.00 & 0.01 & 0.01 & 0.02 & 0.02 & 0.02 & 0.03 & 0.03 & 0.04 \\
\bottomrule
\end{tabular}



In [7]:
print(df.groupby(['n_cols_mis','tag'])[['smae', 'bias']].mean().unstack(-1).to_latex(float_format='{:,.2f}'.format))

\begin{tabular}{lrrrrrrrr}
\toprule
{} & \multicolumn{4}{l}{smae} & \multicolumn{4}{l}{bias} \\
tag & copfit & copreg & gcimpute & mdpfit & copfit & copreg & gcimpute & mdpfit \\
n\_cols\_mis &        &        &          &        &        &        &          &        \\
\midrule
1          &   0.55 &   0.34 &     0.26 &   0.35 &   0.01 &   0.00 &     0.00 &   0.00 \\
2          &   0.69 &   0.49 &     0.34 &   0.46 &   0.03 &   0.01 &     0.01 &   0.01 \\
3          &   0.77 &   0.60 &     0.40 &   0.57 &   0.04 &   0.01 &     0.01 &   0.01 \\
4          &   0.85 &   0.72 &     0.47 &   0.68 &   0.05 &   0.02 &     0.02 &   0.02 \\
5          &   0.91 &   0.84 &     0.55 &   0.80 &   0.06 &   0.02 &     0.02 &   0.02 \\
6          &   0.97 &   0.95 &     0.63 &   0.91 &   0.06 &   0.03 &     0.03 &   0.02 \\
7          &   1.03 &   1.06 &     0.71 &   1.03 &   0.06 &   0.03 &     0.03 &   0.03 \\
8          &   1.08 &   1.17 &     0.79 &   1.16 &   0.06 &   0.03 &     0.04 &   0.03 \\


In [9]:
print(df.groupby(['n_cols_mis','tag'])[['elapsed_s']].mean().unstack(-2).to_latex(float_format='{:,.2f}'.format))

\begin{tabular}{lrrrrrrrrr}
\toprule
{} & \multicolumn{9}{l}{elapsed\_s} \\
n\_cols\_mis &         1 &     2 &     3 &     4 &     5 &     6 &     7 &     8 &     9 \\
tag      &           &       &       &       &       &       &       &       &       \\
\midrule
copfit   &      5.17 &  6.03 &  6.71 &  7.19 &  7.56 &  7.79 &  7.94 &  8.02 &  8.05 \\
copreg   &      4.97 &  9.65 & 14.10 & 18.32 & 22.40 & 26.35 & 30.24 & 34.01 & 37.71 \\
gcimpute &      0.34 &  0.34 &  0.34 &  0.34 &  0.34 &  0.34 &  0.35 &  0.35 &  0.34 \\
mdpfit   &     15.05 & 29.20 & 44.63 & 61.60 & 77.82 & 92.06 & 98.14 & 96.94 & 90.62 \\
\bottomrule
\end{tabular}



In [8]:
df.groupby(['n_cols_mis','tag'])[['elapsed_s']].mean().unstack(-2)

Unnamed: 0_level_0,elapsed_s,elapsed_s,elapsed_s,elapsed_s,elapsed_s,elapsed_s,elapsed_s,elapsed_s,elapsed_s
n_cols_mis,1,2,3,4,5,6,7,8,9
tag,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
copfit,5.17,6.03,6.71,7.19,7.56,7.79,7.94,8.02,8.05
copreg,4.97,9.65,14.1,18.32,22.4,26.35,30.24,34.01,37.71
gcimpute,0.34,0.34,0.34,0.34,0.34,0.34,0.35,0.35,0.34
mdpfit,15.05,29.2,44.63,61.6,77.82,92.06,98.14,96.94,90.62
