In [20]:
from paths import DATA_DIR, CKPT_FOLDER, PROJECT_FOLDER

import matplotlib.pyplot as plt
import os
import numpy as np
import torch
import anndata
import scanpy as sc
import sklearn
import scvelo as scv

import anndata
import pandas as pd

from IPython.display import display
from torchdyn.core import NeuralODE

from scCFM.datamodules.time_sc_datamodule import TrajectoryDataModule
from scCFM.models.cfm.components.mlp import MLP
from scCFM.models.cfm.cfm_module import CFMLitModule

from scCFM.models.base.vae import VAE
from scCFM.models.base.geometric_vae import GeometricNBVAE
from scCFM.models.base.geodesic_ae import GeodesicAE

from scCFM.datamodules.sc_datamodule import scDataModule
from scCFM.models.cfm.components.eval.distribution_distances import compute_distribution_distances

from notebooks.utils import decode_trajectory_single_step, standardize, compute_prdc
from scvi.distributions import NegativeBinomial

Initialize the device

In [21]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [22]:
def cross_standardize(tensor1, tensor2):
    """
    Standardize tensor across the rows
    """
    mean_t1, std_t1 = tensor1.mean(0), tensor1.std(0)
    tensor1 = (tensor1 - mean_t1) / (std_t1 + 1e-6)
    tensor2 = (tensor2 - mean_t1) / (std_t1 + 1e-6)
    return tensor1, tensor2

def update_dict(ref, tgt):
    """
    Update a dictionary with the values of another 
    """
    for key in tgt:
        if key not in ref:
            ref[key] = []
        ref[key].append(tgt[key])
    return ref

Initialize datamodule to set vaes up

In [23]:
datamodule={'path': PROJECT_FOLDER / 'data/schiebinger_et_al/processed/schiebinger_et_al.h5ad', 
            'x_layer': 'X_norm', 
            'cond_keys': ['experimental_time', 'cell_sets'],
            'use_pca': False, 
            'n_dimensions': None, 
            'train_val_test_split': [1], 
            'batch_size': 64, 
            'num_workers': 2}

# Initialize datamodule
datamodule = scDataModule(**datamodule)

Initialize autoencoders

In [24]:
vae_kwargs={'in_dim': datamodule.in_dim,
       'n_epochs_anneal_kl': 1000, 
       'kl_weight': None, 
       'likelihood': 'nb', 
       'dropout': False, 
       'learning_rate': 0.001, 
       'dropout_p': False, 
       'model_library_size': True, 
       'batch_norm': True, 
       'kl_warmup_fraction': 0.1, 
       'hidden_dims': [256, 10]}
        
geometric_kwargs={'compute_metrics_every': 1, 
           'use_c': False, 
           'l2': True, 
           'eta_interp': 0, 
           'interpolate_z': False, 
           'start_jac_after': 0, 
           'fl_weight': 0.1,
           'detach_theta': True}

geodesic_kwargs={"in_dim": datamodule.in_dim,
          "hidden_dims": [256, 10],
          "batch_norm": True,
          "dropout": False, 
          "dropout_p": False,
          "likelihood": "nb",
          "learning_rate": 0.001}

# Initialize vae and geometric vae
vae = GeometricNBVAE(**geometric_kwargs, vae_kwargs=vae_kwargs).to(device)
geometric_vae = GeometricNBVAE(**geometric_kwargs, vae_kwargs=vae_kwargs).to(device)
geodesic_ae = GeodesicAE(**geodesic_kwargs).to(device)

# Load state dicts and put in eval mode 
vae.load_state_dict(torch.load(PROJECT_FOLDER / "checkpoints/ae/schiebinger_et_al/best_model_vae.ckpt")["state_dict"])
geometric_vae.load_state_dict(torch.load(PROJECT_FOLDER / "checkpoints/ae/schiebinger_et_al/best_model_geometric.ckpt")["state_dict"])
geodesic_ae.load_state_dict(torch.load(PROJECT_FOLDER / "checkpoints/ae/schiebinger_et_al/best_model_geodesic.ckpt")["state_dict"])

<All keys matched successfully>

## Setup CFMs

In [25]:
leavout_timepoints_folder = CKPT_FOLDER / "trajectory" / "schiebinger_et_al"

Initialize datamodule for trajectory

In [26]:
datamodule_kwargs_vae = {'path': PROJECT_FOLDER / 'data/schiebinger_et_al/flat/schiebinger_et_al_lib.h5ad',
                          'x_layer': 'X_latents',
                          'time_key': 'experimental_time', 
                          'use_pca': False, 
                          'n_dimensions': None, 
                          'train_val_test_split': [0.9, 0.1], 
                          'num_workers': 2, 
                          'batch_size': 512, 
                          'model_library_size': True}

datamodule_kwargs_flat = {'path': PROJECT_FOLDER / 'data/schiebinger_et_al/flat/schiebinger_et_al_flat_lib.h5ad',
                          'x_layer': 'X_latents',
                          'time_key': 'experimental_time', 
                          'use_pca': False, 
                          'n_dimensions': None, 
                          'train_val_test_split': [0.9, 0.1], 
                          'num_workers': 2, 
                          'batch_size': 512, 
                           'model_library_size': True}

datamodule_kwargs_geodesic= {'path': PROJECT_FOLDER / 'data/schiebinger_et_al/flat/schiebinger_et_al_geodesic.h5ad',
                          'x_layer': 'X_latents',
                          'time_key': 'experimental_time', 
                          'use_pca': False, 
                          'n_dimensions': None, 
                          'train_val_test_split': [0.9, 0.1], 
                          'num_workers': 2, 
                          'batch_size': 512, 
                           'model_library_size': True}

# Initialize the datamodules 
datamodule_vae = TrajectoryDataModule(**datamodule_kwargs_vae)
datamodule_flat = TrajectoryDataModule(**datamodule_kwargs_flat)
datamodule_geodesic = TrajectoryDataModule(**datamodule_kwargs_geodesic)

# Mapping real times to index
idx2time = datamodule_vae.idx2time

## Read data

First, read the latent space anndata and plot the results

In [27]:
# Read latent anndata
adata_latent_vae = sc.read_h5ad(DATA_DIR / "schiebinger_et_al" / "flat" / "schiebinger_et_al_lib.h5ad")
adata_latent_flat = sc.read_h5ad(DATA_DIR / "schiebinger_et_al" / "flat" / "schiebinger_et_al_flat_lib.h5ad")
adata_latent_geodesic = sc.read_h5ad(DATA_DIR / "schiebinger_et_al" / "flat" / "schiebinger_et_al_geodesic.h5ad")

# Read real anndata
adata_schiebinger_original = sc.read_h5ad(PROJECT_FOLDER / 'data/schiebinger_et_al/processed/schiebinger_et_al.h5ad')
sc.tl.pca(adata_schiebinger_original, n_comps=50)
adata_schiebinger_original.X = adata_schiebinger_original.layers["X_norm"].copy()

Number of experiments 

In [28]:
n_timepoints = len(np.unique(adata_latent_vae.obs.experimental_time))
idx2time = dict(zip(range(n_timepoints), np.unique(adata_latent_vae.obs.experimental_time)))
idx2time

{0: 0.0,
 1: 0.027777777777777776,
 2: 0.05555555555555555,
 3: 0.08333333333333333,
 4: 0.1111111111111111,
 5: 0.1388888888888889,
 6: 0.16666666666666666,
 7: 0.19444444444444445,
 8: 0.2222222222222222,
 9: 0.25,
 10: 0.2777777777777778,
 11: 0.3055555555555556,
 12: 0.3333333333333333,
 13: 0.3611111111111111,
 14: 0.3888888888888889,
 15: 0.4166666666666667,
 16: 0.4444444444444444,
 17: 0.4583333333333333,
 18: 0.4722222222222222,
 19: 0.4861111111111111,
 20: 0.5,
 21: 0.5277777777777778,
 22: 0.5555555555555556,
 23: 0.5833333333333334,
 24: 0.6111111111111112,
 25: 0.6388888888888888,
 26: 0.6666666666666666,
 27: 0.6944444444444444,
 28: 0.7222222222222222,
 29: 0.75,
 30: 0.7777777777777778,
 31: 0.8055555555555556,
 32: 0.8333333333333334,
 33: 0.8611111111111112,
 34: 0.8888888888888888,
 35: 0.9166666666666666,
 36: 0.9444444444444444,
 37: 0.9722222222222222,
 38: 1.0}

Initialize model

In [29]:
net_hparams = {"dim": adata_latent_flat.X.shape[1]+1,
                "w": 64,
                "time_varying": True}

cfm_kwargs = {'ot_sampler': 'exact', 
                   'sigma': 0.1, 
                   'use_real_time': False, 
                   'lr': 0.001, 
                   'antithetic_time_sampling': True}

## Evaluation

Load checkpoints

In [30]:
# LATENT SPACE METRICS
leaveout_ckpt_vae_latent = {}
leaveout_ckpt_flat_latent = {}
leaveout_ckpt_geodesic_latent = {}
leaveout_ckpt_previous_latent = {}

In [31]:
# DATA SPACE METRICS
leaveout_ckpt_vae_data = {}
leaveout_ckpt_flat_data = {}
leaveout_ckpt_geodesic_data = {}
leaveout_ckpt_previous_data = {}

In [33]:
for rep in range(1,4):
    for tp in [2, 5, 10, 15, 20, 25, 30]:
        print(f"Time point {tp}")
        #Pick time 0 observations
        X_adata_t0_latent_vae = torch.from_numpy(adata_latent_vae[adata_latent_vae.obs["experimental_time"]==idx2time[(tp-1)]].X).to(device)
        X_adata_t0_latent_flat = torch.from_numpy(adata_latent_flat[adata_latent_flat.obs["experimental_time"]==idx2time[(tp-1)]].X).to(device)
        X_adata_t0_latent_geodesic = torch.from_numpy(adata_latent_geodesic[adata_latent_geodesic.obs["experimental_time"]==idx2time[(tp-1)]].X).to(device)
    
        # Pick observations next timepoint 
        X_adata_t1_latent_vae = torch.from_numpy(adata_latent_vae[adata_latent_vae.obs["experimental_time"]==idx2time[tp]].X).to(device)
        X_adata_t1_latent_flat = torch.from_numpy(adata_latent_flat[adata_latent_flat.obs["experimental_time"]==idx2time[tp]].X).to(device)
        X_adata_t1_latent_geodesic = torch.from_numpy(adata_latent_geodesic[adata_latent_geodesic.obs["experimental_time"]==idx2time[tp]].X).to(device)    
    
        # Collect PCs    
        adata_real = adata_schiebinger_original[adata_schiebinger_original.obs["experimental_time"]==idx2time[tp]]
        X_adata_real_pca = torch.from_numpy(adata_real.obsm["X_pca"]).to(device)
        X_adata_real = torch.from_numpy(adata_real.layers["X_log"].A).to(device)
        
        #Pick library sizes
        l_t0_vae = adata_latent_vae.obs.loc[adata_latent_vae.obs["experimental_time"]==idx2time[(tp-1)], "log_library_size"].to_numpy()
        l_t0_flat = adata_latent_flat.obs.loc[adata_latent_flat.obs["experimental_time"]==idx2time[(tp-1)], "log_library_size"].to_numpy()
        l_t0_geodesic = adata_latent_geodesic.obs.loc[adata_latent_geodesic.obs["experimental_time"]==idx2time[(tp-1)], "log_library_size"].to_numpy()
    
        #Pick library sizes
        l_t0_vae = torch.from_numpy(l_t0_vae).to(device)
        l_t0_flat = torch.from_numpy(l_t0_flat).to(device)
        l_t0_geodesic = torch.from_numpy(l_t0_geodesic).to(device)
    
        # Initialize nets
        net_vae = MLP(**net_hparams).to(device)
        net_flat = MLP(**net_hparams).to(device)
        net_geodesic = MLP(**net_hparams).to(device)
        cfm_vae = CFMLitModule(net=net_vae, datamodule=datamodule_vae, **cfm_kwargs).to(device)
        cfm_flat = CFMLitModule(net=net_flat, datamodule=datamodule_flat, **cfm_kwargs).to(device)
        cfm_geodesic = CFMLitModule(net=net_geodesic, datamodule=datamodule_geodesic, **cfm_kwargs).to(device)
    
        # Read the checkpoints
        cfm_vae.load_state_dict(torch.load(leavout_timepoints_folder / f"schiebinger_vae_leaveout_{tp}_{rep}.ckpt")["state_dict"])
        cfm_flat.load_state_dict(torch.load(leavout_timepoints_folder / f"schiebinger_flat_leaveout_{tp}_{rep}.ckpt")["state_dict"])
        cfm_geodesic.load_state_dict(torch.load(leavout_timepoints_folder / f"schiebinger_geodesic_leaveout_{tp}_{rep}.ckpt")["state_dict"])
    
        mu_adata_predicted_vae, X_adata_predicted_vae, X_adata_latent_vae = decode_trajectory_single_step(X_adata_t0_latent_vae, 
                                                                                     l_t0_vae, 
                                                                                     tp-1, 
                                                                                     cfm_vae, 
                                                                                     vae)
                                                                                    
        mu_adata_predicted_flat, X_adata_predicted_flat, X_adata_latent_flat = decode_trajectory_single_step(X_adata_t0_latent_flat, 
                                                                                       l_t0_flat, 
                                                                                       tp-1, 
                                                                                       cfm_flat, 
                                                                                       geometric_vae)
                                                                                      
        mu_adata_predicted_geodesic, X_adata_predicted_geodesic, X_adata_latent_geodesic = decode_trajectory_single_step(X_adata_t0_latent_geodesic, 
                                                                                               l_t0_geodesic, 
                                                                                               tp-1, 
                                                                                               cfm_geodesic, 
                                                                                               geodesic_ae, 
                                                                                               model_type="geodesic_ae")
    
        ## PREDICT LATENT TRAJECTORIES 
        X_adata_t1_latent_vae, X_adata_latent_vae = cross_standardize(X_adata_t1_latent_vae, X_adata_latent_vae[:,:-1])
        X_adata_t1_latent_flat, X_adata_latent_flat = cross_standardize(X_adata_t1_latent_flat, X_adata_latent_flat[:,:-1])
        X_adata_t1_latent_geodesic, X_adata_latent_geodesic = cross_standardize(X_adata_t1_latent_geodesic, X_adata_latent_geodesic[:,:-1])
                                                                                       
        d_dist_vae_l = compute_distribution_distances(X_adata_t1_latent_vae.unsqueeze(1).to("cpu"), 
                                             X_adata_latent_vae.unsqueeze(1).to("cpu"))
        d_dist_flat_l = compute_distribution_distances(X_adata_t1_latent_flat.unsqueeze(1).to("cpu"),
                                             X_adata_latent_flat.unsqueeze(1).to("cpu"))
        d_dist_geod_l = compute_distribution_distances(X_adata_t1_latent_geodesic.unsqueeze(1).to("cpu"),
                                             X_adata_latent_geodesic.unsqueeze(1).to("cpu"))
        d_dist_prev = compute_distribution_distances(X_adata_t1_latent_vae.unsqueeze(1).to("cpu"),
                                             X_adata_t0_latent_vae.unsqueeze(1).to("cpu"))
        
        d_dist_vae_l = dict(zip(d_dist_vae_l[0], d_dist_vae_l[1]))
        d_dist_flat_l = dict(zip(d_dist_flat_l[0], d_dist_flat_l[1]))
        d_dist_geod_l = dict(zip(d_dist_geod_l[0], d_dist_geod_l[1]))
        d_dist_prev_l = dict(zip(d_dist_prev[0], d_dist_prev[1]))
        
        print("predict decoded trajectory")
        X_adata_predicted_vae = anndata.AnnData(X=X_adata_predicted_vae.numpy())
        X_adata_predicted_flat = anndata.AnnData(X=X_adata_predicted_flat.numpy())
        X_adata_predicted_geodesic = anndata.AnnData(X=X_adata_predicted_geodesic.numpy())
        X_adata_prev = adata_schiebinger_original[adata_schiebinger_original.obs["experimental_time"]==idx2time[tp-1]]
        
        sc.pp.log1p(X_adata_predicted_vae)
        sc.pp.log1p(X_adata_predicted_flat)
        sc.pp.log1p(X_adata_prev)
        sc.tl.pca(X_adata_predicted_vae, n_comps=50)
        sc.tl.pca(X_adata_predicted_flat, n_comps=50)
        sc.tl.pca(X_adata_predicted_geodesic, n_comps=50)
        sc.tl.pca(X_adata_prev, n_comps=50)
        
        d_dist_vae_d = compute_prdc(torch.from_numpy(X_adata_predicted_vae.obsm["X_pca"]), 
                                                 X_adata_real_pca.to("cpu"), nearest_k=10)
        d_dist_flat_d = compute_prdc(torch.from_numpy(X_adata_predicted_flat.obsm["X_pca"]), 
                                                 X_adata_real_pca.to("cpu"), nearest_k=10)
        d_dist_geodesic_d = compute_prdc(torch.from_numpy(X_adata_predicted_geodesic.obsm["X_pca"]), 
                                                 X_adata_real_pca.to("cpu"), nearest_k=10)
        d_dist_prev_d = compute_prdc(torch.from_numpy(X_adata_prev.obsm["X_pca"]), 
                                                 X_adata_real_pca.to("cpu"), nearest_k=10)
    
        # UPDATE DICTS 
        leaveout_ckpt_vae_latent = update_dict(leaveout_ckpt_vae_latent, d_dist_vae_l)
        leaveout_ckpt_flat_latent = update_dict(leaveout_ckpt_flat_latent, d_dist_flat_l)
        leaveout_ckpt_geodesic_latent = update_dict(leaveout_ckpt_geodesic_latent, d_dist_geod_l)
        leaveout_ckpt_previous_latent = update_dict(leaveout_ckpt_previous_latent, d_dist_prev_l)
        leaveout_ckpt_vae_data = update_dict(leaveout_ckpt_vae_data, d_dist_vae_d)
        leaveout_ckpt_flat_data = update_dict(leaveout_ckpt_flat_data, d_dist_flat_d)
        leaveout_ckpt_geodesic_data = update_dict(leaveout_ckpt_geodesic_data, d_dist_geodesic_d)
        leaveout_ckpt_previous_data = update_dict(leaveout_ckpt_previous_data, d_dist_prev_d)
        
        leaveout_ckpt_vae_latent = update_dict(leaveout_ckpt_vae_latent, {"rep": rep})
        leaveout_ckpt_flat_latent = update_dict(leaveout_ckpt_flat_latent, {"rep": rep})
        leaveout_ckpt_geodesic_latent = update_dict(leaveout_ckpt_geodesic_latent, {"rep": rep})
        leaveout_ckpt_previous_latent = update_dict(leaveout_ckpt_previous_latent, {"rep": rep})
        leaveout_ckpt_vae_data = update_dict(leaveout_ckpt_vae_data, {"rep": rep})
        leaveout_ckpt_flat_data = update_dict(leaveout_ckpt_flat_data, {"rep": rep})
        leaveout_ckpt_geodesic_data = update_dict(leaveout_ckpt_geodesic_data, {"rep": rep})
        leaveout_ckpt_previous_data = update_dict(leaveout_ckpt_previous_data, {"rep": rep})

Time point 2
predict decoded trajectory


  view_to_actual(adata)


Time point 5
predict decoded trajectory


  view_to_actual(adata)


Time point 10
predict decoded trajectory


  view_to_actual(adata)


Time point 15
predict decoded trajectory


  view_to_actual(adata)


Time point 20
predict decoded trajectory


  view_to_actual(adata)


Time point 25
predict decoded trajectory


  view_to_actual(adata)


Time point 5
predict decoded trajectory


  view_to_actual(adata)


Time point 10
predict decoded trajectory


  view_to_actual(adata)


Time point 15
predict decoded trajectory


  view_to_actual(adata)


Time point 20
predict decoded trajectory


  view_to_actual(adata)


Time point 25
predict decoded trajectory


  view_to_actual(adata)


Time point 30
predict decoded trajectory


  view_to_actual(adata)


Time point 2
predict decoded trajectory


  view_to_actual(adata)


Time point 5
predict decoded trajectory


  view_to_actual(adata)


Time point 10
predict decoded trajectory


  view_to_actual(adata)


Time point 15
predict decoded trajectory


  view_to_actual(adata)


Time point 20
predict decoded trajectory


  view_to_actual(adata)


Time point 25
predict decoded trajectory


  view_to_actual(adata)


Time point 30
predict decoded trajectory


  view_to_actual(adata)


**Latent**

Geodesic

In [60]:
pd.DataFrame(leaveout_ckpt_geodesic_latent).mean(0)

1-Wasserstein    2.329433
2-Wasserstein    2.492044
Linear_MMD       0.385581
Poly_MMD         0.535126
RBF_MMD          0.454486
Mean_MSE         0.426298
Mean_L2          0.570427
Mean_L1          0.483610
rep              2.000000
dtype: float64

In [61]:
pd.DataFrame(leaveout_ckpt_geodesic_latent).std(0)/np.sqrt(21)

1-Wasserstein    0.207948
2-Wasserstein    0.217283
Linear_MMD       0.087805
Poly_MMD         0.070435
RBF_MMD          0.059337
Mean_MSE         0.088869
Mean_L2          0.071032
Mean_L1          0.059893
rep              0.182574
dtype: float64

VAE

In [62]:
pd.DataFrame(leaveout_ckpt_vae_latent).mean(0)

1-Wasserstein    1.999991
2-Wasserstein    2.078605
Linear_MMD       0.191206
Poly_MMD         0.381490
RBF_MMD          0.292974
Mean_MSE         0.211045
Mean_L2          0.402064
Mean_L1          0.314145
rep              2.000000
dtype: float64

In [63]:
pd.DataFrame(leaveout_ckpt_vae_latent).std(0)/np.sqrt(21)

1-Wasserstein    0.118438
2-Wasserstein    0.119343
Linear_MMD       0.041419
Poly_MMD         0.047787
RBF_MMD          0.033407
Mean_MSE         0.045530
Mean_L2          0.049694
Mean_L1          0.034405
rep              0.182574
dtype: float64

Flat

In [45]:
pd.DataFrame(leaveout_ckpt_flat_latent).mean(0)

1-Wasserstein    1.545736
2-Wasserstein    1.638642
Linear_MMD       0.163053
Poly_MMD         0.346271
RBF_MMD          0.289706
Mean_MSE         0.170591
Mean_L2          0.360636
Mean_L1          0.299171
rep              2.000000
dtype: float64

In [46]:
pd.DataFrame(leaveout_ckpt_flat_latent).std(0)/np.sqrt(21)

1-Wasserstein    0.126826
2-Wasserstein    0.133294
Linear_MMD       0.037846
Poly_MMD         0.046448
RBF_MMD          0.039556
Mean_MSE         0.037979
Mean_L2          0.045018
Mean_L1          0.038694
rep              0.182574
dtype: float64

Baseline

In [47]:
pd.DataFrame(leaveout_ckpt_previous_latent).mean(0)

1-Wasserstein    3.138594
2-Wasserstein    3.213354
Linear_MMD       0.752031
Poly_MMD         0.828778
RBF_MMD          0.699239
Mean_MSE         0.767308
Mean_L2          0.834317
Mean_L1          0.698441
rep              2.000000
dtype: float64

In [48]:
pd.DataFrame(leaveout_ckpt_previous_latent).std(0)/np.sqrt(21)

1-Wasserstein    0.166742
2-Wasserstein    0.163996
Linear_MMD       0.111211
Poly_MMD         0.057078
RBF_MMD          0.050749
Mean_MSE         0.117782
Mean_L2          0.059676
Mean_L1          0.053863
rep              0.182574
dtype: float64

**Flat**

Geodesic

In [50]:
pd.DataFrame(leaveout_ckpt_geodesic_data).mean(0)

precision    0.036196
recall       0.230541
density      0.003977
coverage     0.001679
rep          2.000000
dtype: float64

In [51]:
pd.DataFrame(leaveout_ckpt_geodesic_data).std(0)/np.sqrt(21)

precision    0.013831
recall       0.050641
density      0.001536
coverage     0.000427
rep          0.182574
dtype: float64

VAE

In [52]:
pd.DataFrame(leaveout_ckpt_vae_data).mean(0)

precision    0.366719
recall       0.053897
density      0.131509
coverage     0.098468
rep          2.000000
dtype: float64

In [53]:
pd.DataFrame(leaveout_ckpt_vae_data).std(0)/np.sqrt(21)

precision    0.042168
recall       0.008185
density      0.019767
coverage     0.005881
rep          0.182574
dtype: float64

Flat

In [55]:
pd.DataFrame(leaveout_ckpt_flat_data).mean(0)

precision    0.382833
recall       0.074628
density      0.160654
coverage     0.127050
rep          2.000000
dtype: float64

In [56]:
pd.DataFrame(leaveout_ckpt_flat_data).std(0)/np.sqrt(21)

precision    0.043896
recall       0.012476
density      0.037192
coverage     0.012890
rep          0.182574
dtype: float64

Baseline

In [58]:
pd.DataFrame(leaveout_ckpt_previous_data).mean(0)

precision    0.270361
recall       0.072238
density      0.123421
coverage     0.071784
rep          2.000000
dtype: float64

In [59]:
pd.DataFrame(leaveout_ckpt_previous_data).std(0)/np.sqrt(21)

precision    0.050155
recall       0.014285
density      0.040479
coverage     0.013738
rep          0.182574
dtype: float64