In [1]:
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

cols = [
    'tag',
    'seed',
    'n_fits',
    'n_sims',
    'mdps',
    'n_cols',
    'n_cc',
    'mis_by_col',
    'smae',
    'elapsed',
    'bias'
]

In [4]:
data_dir = '/Users/nshah/work/vcimpute/data/experiment4_copreg_only'
file_lst = os.listdir(data_dir)
for fname in file_lst:
    _df = pd.DataFrame(pd.read_pickle(os.path.join(data_dir,fname)),columns=cols)
    _df['d'] = int(fname.split('_')[2].split('.')[0])
    df = pd.concat([df, _df])

In [11]:
df = df.sort_values(by=['d', 'seed', 'tag']).reset_index(drop=True)

In [13]:
df['smae_raw'] = df['smae']
df['smae'] = df['smae_raw'].apply(np.nanmean)

In [18]:
df['mask_frac'] = 0.5*df['n_cols']/df['d']

In [22]:
df['elapsed'] = df['elapsed']/1e9/60

In [29]:
print(df.groupby(['d','tag']).agg({'smae': np.mean, 'bias': np.mean, 'elapsed': np.mean, 'mask_frac': np.mean}).unstack(-1).to_latex(float_format='{:,.2f}'.format))

\begin{tabular}{lrrrrrrrr}
\toprule
{} & \multicolumn{2}{l}{smae} & \multicolumn{2}{l}{bias} & \multicolumn{2}{l}{elapsed} & \multicolumn{2}{l}{mask\_frac} \\
tag & copreg & gcimpute & copreg & gcimpute &  copreg & gcimpute &    copreg & gcimpute \\
d   &        &          &        &          &         &          &           &          \\
\midrule
50  &   0.82 &     0.59 &   0.11 &     0.14 &   36.41 &     0.02 &      0.20 &     0.20 \\
60  &   0.76 &     0.55 &   0.11 &     0.12 &   54.43 &     0.03 &      0.17 &     0.17 \\
70  &   0.73 &     0.53 &   0.11 &     0.11 &   76.08 &     0.03 &      0.14 &     0.14 \\
80  &   0.65 &     0.47 &   0.10 &     0.09 &  101.18 &     0.04 &      0.12 &     0.12 \\
90  &   0.64 &     0.46 &   0.10 &     0.08 &  129.90 &     0.05 &      0.11 &     0.11 \\
100 &   0.62 &     0.44 &   0.10 &     0.08 &  162.05 &     0.06 &      0.10 &     0.10 \\
\bottomrule
\end{tabular}



In [20]:
df

Unnamed: 0,tag,seed,n_fits,n_sims,mdps,n_cols,n_cc,mis_by_col,smae,elapsed,bias,d,smae_raw,mask_frac
0,copreg,0,20,20,7,20,466,"[534, 534, 534, 0, 0, 0, 534, 0, 0, 420, 352, ...",0.787956,2163796178000,0.109834,50,"[0.879601223569074, 0.6707690187120207, 0.7515...",0.2
1,gcimpute,0,1,1,7,20,466,"[534, 534, 534, 0, 0, 0, 534, 0, 0, 420, 352, ...",0.569871,1372789000,0.131607,50,"[0.6772588924236599, 0.5135489004550393, 0.526...",0.2
2,copreg,1,20,20,10,20,444,"[0, 556, 0, 0, 555, 444, 0, 0, 0, 132, 0, 407,...",0.839337,2151206978000,0.118972,50,"[nan, 0.8371935680664032, nan, nan, 0.82208691...",0.2
3,gcimpute,1,1,1,10,20,444,"[0, 556, 0, 0, 555, 444, 0, 0, 0, 132, 0, 407,...",0.588445,1527254000,0.144104,50,"[nan, 0.5991061568190504, nan, nan, 0.58830363...",0.2
4,copreg,2,20,20,10,20,454,"[0, 0, 546, 546, 0, 0, 0, 0, 440, 426, 493, 0,...",0.820453,2142743027000,0.109439,50,"[nan, nan, 0.8835425380237296, 0.8223197306338...",0.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,gcimpute,17,1,1,6,20,469,"[0, 515, 0, 531, 531, 0, 0, 0, 531, 0, 0, 0, 0...",0.454334,3096706000,0.081395,100,"[nan, 0.4569085960022882, nan, 0.5291876657221...",0.1
236,copreg,18,20,20,9,20,486,"[0, 0, 0, 0, 0, 0, 0, 507, 0, 0, 0, 0, 509, 0,...",0.629637,9597765981000,0.097744,100,"[nan, nan, nan, nan, nan, nan, nan, 0.57550953...",0.1
237,gcimpute,18,1,1,9,20,486,"[0, 0, 0, 0, 0, 0, 0, 507, 0, 0, 0, 0, 509, 0,...",0.452855,3032000000,0.079308,100,"[nan, nan, nan, nan, nan, nan, nan, 0.40920758...",0.1
238,copreg,19,20,20,6,20,495,"[0, 0, 459, 0, 0, 505, 0, 0, 0, 0, 0, 0, 0, 0,...",0.605143,9689318615000,0.096277,100,"[nan, nan, 0.6341865982065816, nan, nan, 0.773...",0.1
