### README
A notebook to compute evaluation scores for raw data as well as bacth corrected using besline methods: regressing batch effect out, ComBat and mnnCorrect. The workflow is run for both, the simulated and real-world data.

In [2]:
import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import os 
import glob
import sys
from FlowCytometryTools import FCMeasurement
from collections import Counter
import pdb
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
import xlrd
from collections import Counter
from sklearn import preprocessing
import scipy as sp
import anndata
import scanpy as sc

sys.path.append(os.path.dirname(os.getcwd()))
from visualisation_and_evaluation.helpers_eval import cal_UMAP, entropy, cal_entropy, evaluate_scores, separate_metadata
from baselines.baselines_helpers import scale, convert_to_ann, sample_cells, batch_correct, prep_anndata_for_eval, eval_batch_sample 

In [3]:
def wrapper_raw(adata_full, samples_selected, save_path, suffix='full'):
    # raw scores
    adata_full_dict = dict()
    for sample in samples_selected:
        adata_full_dict[sample] = adata_full[adata_full.obs['sample']==sample].copy()
    eval_full_raw = eval_batch_sample(adata_full_dict)
    eval_full_raw['method'] = 'raw'
    eval_full_raw.to_csv(save_path+'scores_raw_'+suffix+'.csv')
    return(eval_full_raw)

def wrapper_reg(adata_full, samples_selected, save_path, suffix='full'):
    # regress out batch effect
    adata_full_batch_reg = batch_correct(adata_full, method='reg')
    eval_full_batch_reg = eval_batch_sample(adata_full_batch_reg)
    eval_full_batch_reg['method'] = 'reg'
    eval_full_batch_reg.to_csv(save_path+'scores_reg_'+suffix+'.csv')
    return(eval_full_batch_reg)

def wrapper_combat(adata_full, samples_selected, save_path, suffix='full'):
    # combat
    adata_full_batch_combat = batch_correct(adata_full, method='combat')
    eval_full_batch_combat = eval_batch_sample(adata_full_batch_combat)
    eval_full_batch_combat['method'] = 'combat'
    eval_full_batch_combat.to_csv(save_path+'scores_combat_'+suffix+'.csv')
    return(eval_full_batch_combat)

def wrapper_mnn(adata_full, samples_selected, save_path, suffix='full'):
    # mnnCorrect
    adata_batch_mnn = dict()
    max_cells = 1000
    #random_state_list = [123465, 87654, 289, 243, 1234]
    random_state_list = [19885, 1998, 8768, 26998, 243]
    eval_random_state = dict()
    for random_state in random_state_list:
        for sample in samples_selected:
            adata = adata_full[adata_full.obs['sample']==sample,:].copy()
            adata_sampled = sample_cells(adata, random_state=random_state, max_cells=max_cells)
            adata_sampled_batch_ann = batch_correct(adata_sampled, method='mnn')
            adata_batch_mnn[sample] = adata_sampled_batch_ann[sample]
        eval_full_mnn = eval_batch_sample(adata_batch_mnn)
        eval_random_state[random_state] = eval_full_mnn
    eval_full_batch_mnn = pd.concat(eval_random_state)
    eval_full_batch_mnn['random_state'] = [x for x in eval_full_batch_mnn.index.get_level_values(0)]
    eval_full_batch_mnn.index = range(eval_full_batch_mnn.shape[0])
    eval_full_batch_mnn.to_csv(save_path+'scores_mnn_'+suffix+'.csv')
    # average score scross random_states
    eval_full_batch_mnn['divergence_score'] = pd.to_numeric(eval_full_batch_mnn['divergence_score'])
    eval_full_batch_mnn['entropy_score'] = pd.to_numeric(eval_full_batch_mnn['entropy_score'])
    eval_full_batch_mnn['silhouette_score'] = pd.to_numeric(eval_full_batch_mnn['silhouette_score'])
    eval_full_batch_mnn = eval_full_batch_mnn.drop(columns='random_state')
    eval_full_batch_mnn.groupby(['sample']).apply(np.mean)
    eval_full_batch_mnn_mean = pd.DataFrame(eval_full_batch_mnn.groupby(['sample']).apply(np.mean))
    eval_full_batch_mnn_mean['method'] = 'mnn'
    eval_full_batch_mnn_mean['sample'] = eval_full_batch_mnn_mean.index
    eval_full_batch_mnn_mean.to_csv(save_path+'scores_mnn_'+suffix+'_mean.csv')
    return(eval_full_batch_mnn_mean)


In [4]:
wd = os.getcwd()
wd = os.path.abspath(os.path.join(wd,"..","..","data"))

In [None]:
####################  simulated data  ###################

In [None]:
######  all cell populations shared  ######

In [33]:
data_path = wd+'/simulated/'
save_path = wd+'/simulated/eval_scores_upsample/'
path = os.path.join(data_path, 'toy_data_gamma_w_index.parquet')
df_full = None
for sample in ['sample1']:
    x1_train, x1_test, x2_train, x2_test = load_data_basic(path,
                                 sample=sample, batch_names=['batch1', 'batch2'], panel=None)

    tmp_  = pd.concat([x1_train, x2_train])
    if df_full is None:
        df_full = tmp_
    else:
        df_full = pd.concat([df_full, tmp_], axis = 0 )

metadata_batch = [ i.split('_')[0] for i in df_full.index]
metadata_cell = [ i.split('_')[-1] for i in df_full.index]
metadata_sample = [ i.split('_')[1] for i in df_full.index]
df_full['metadata_batch'] = metadata_batch
df_full['metadata_celltype'] = metadata_cell
df_full['metadata_sample'] = metadata_sample
df_full = df_full.dropna(axis=1)
df_full = df_full.reset_index(drop = True)
# # global settings
# data_path = wd+'/simulated/'
# save_path = wd+'/simulated/eval_scores_upsample/'

# df_full = pd.read_parquet(data_path+'toy_data_gamma_w_index.parquet')
# df_full = df_full.dropna(axis=1)
samples_selected = sp.unique(df_full['metadata_sample'])
adata_full = convert_to_ann(df_full, sample_col_name = "metadata_sample", batch_col_name="metadata_batch",
                  celltype_col_name = 'metadata_celltype')
adata_full.obs_names_make_unique()

Transforming to str index.


x1 shape (12084, 20)
x2 shape (12084, 20)


In [35]:
suffix = 'toy'
eval_full_raw = wrapper_raw(adata_full, samples_selected, save_path, suffix)
eval_full_batch_reg = wrapper_reg(adata_full, samples_selected, save_path, suffix)
eval_full_batch_combat = wrapper_combat(adata_full, samples_selected, save_path, suffix)
eval_full_batch_mnn_mean = wrapper_mnn(adata_full, samples_selected, save_path, suffix)

# merge all baseline scores
eval_all = pd.concat([eval_full_raw, eval_full_batch_reg, eval_full_batch_combat, eval_full_batch_mnn_mean])
eval_all.to_csv(save_path+'scores_baselines_full.csv')
eval_all

The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.

File "../../../../../../../anaconda3/envs/dl/lib/python3.7/site-packages/umap/nndescent.py", line 47:
    @numba.njit(parallel=True)
    def nn_descent(
    ^

  state.func_ir.loc))
... storing 'sample' as categorical
... storing 'batch' as categorical
... storing 'cell_type' as categorical
The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.

File "../../../../../../../anaconda3/envs/dl/lib/python3.7/site-packages/umap/nndescent.py", line 47:
    @numba.njit(parallel=True)
    def nn_descent(
    ^

  state.func_ir.loc))
... storing 'sample' a

Performing cosine normalization...
Starting MNN correct iteration. Reference batch: 0
Step 1 of 1: processing batch 1
  Looking for MNNs...
  Computing correction vectors...
  Adjusting variance...
  Applying correction...
MNN correction complete. Gathering output...
Packing AnnData object...
Done.
Performing cosine normalization...
Starting MNN correct iteration. Reference batch: 0
Step 1 of 1: processing batch 1
  Looking for MNNs...
  Computing correction vectors...
  Adjusting variance...
  Applying correction...
MNN correction complete. Gathering output...
Packing AnnData object...
Done.
Performing cosine normalization...
Starting MNN correct iteration. Reference batch: 0
Step 1 of 1: processing batch 1
  Looking for MNNs...
  Computing correction vectors...
  Adjusting variance...
  Applying correction...
MNN correction complete. Gathering output...
Packing AnnData object...
Done.
Performing cosine normalization...
Starting MNN correct iteration. Reference batch: 0
Step 1 of 1: p

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  


Unnamed: 0,divergence_score,entropy_score,method,sample,silhouette_score
sample1,0.363898,-1,raw,sample1,-0.0945268
sample1,0.0831923,-1,reg,sample1,-0.0361925
sample1,0.0365784,-1,combat,sample1,-0.0710457
sample1,0.0218221,-1,mnn,sample1,-0.0769231


In [None]:
######  some cell populations shared  ######

In [36]:
# global settings
data_path = wd+'/simulated/'
save_path = wd+'/simulated/eval_scores_subset_upsample/'
path = os.path.join(data_path, 'toy_data_gamma_w_index_subset.parquet')
df_full = None
for sample in ['sample1']:
    x1_train, x1_test, x2_train, x2_test = load_data_basic(path,
                                 sample=sample, batch_names=['batch1', 'batch2'], panel=None)

    tmp_  = pd.concat([x1_train, x2_train])
    if df_full is None:
        df_full = tmp_
    else:
        df_full = pd.concat([df_full, tmp_], axis = 0 )

metadata_batch = [ i.split('_')[0] for i in df_full.index]
metadata_cell = [ i.split('_')[-1] for i in df_full.index]
metadata_sample = [ i.split('_')[1] for i in df_full.index]
df_full['metadata_batch'] = metadata_batch
df_full['metadata_celltype'] = metadata_cell
df_full['metadata_sample'] = metadata_sample
df_full = df_full.dropna(axis=1)
df_full = df_full.reset_index(drop = True)

# data_path = wd+'/simulated/'
# save_path = wd+'/simulated/eval_scores_subset/'

# df_full = pd.read_parquet(data_path+'toy_data_gamma_w_index_subset.parquet')
# df_full = df_full.dropna(axis=1)
samples_selected = sp.unique(df_full['metadata_sample'])
adata_full = convert_to_ann(df_full, sample_col_name = "metadata_sample", batch_col_name="metadata_batch",
                  celltype_col_name = 'metadata_celltype')
adata_full.obs_names_make_unique()

Transforming to str index.


x1 shape (9588, 20)
x2 shape (9588, 20)


In [37]:
suffix = 'toysubset'
eval_full_raw = wrapper_raw(adata_full, samples_selected, save_path, suffix)
eval_full_batch_reg = wrapper_reg(adata_full, samples_selected, save_path, suffix)
eval_full_batch_combat = wrapper_combat(adata_full, samples_selected, save_path, suffix)
eval_full_batch_mnn_mean = wrapper_mnn(adata_full, samples_selected, save_path, suffix)

# merge all baseline scores
eval_all = pd.concat([eval_full_raw, eval_full_batch_reg, eval_full_batch_combat, eval_full_batch_mnn_mean])
eval_all.to_csv(save_path+'scores_baselines_full.csv')
eval_all

The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.

File "../../../../../../../anaconda3/envs/dl/lib/python3.7/site-packages/umap/nndescent.py", line 47:
    @numba.njit(parallel=True)
    def nn_descent(
    ^

  state.func_ir.loc))
... storing 'sample' as categorical
... storing 'batch' as categorical
... storing 'cell_type' as categorical
The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.

File "../../../../../../../anaconda3/envs/dl/lib/python3.7/site-packages/umap/nndescent.py", line 47:
    @numba.njit(parallel=True)
    def nn_descent(
    ^

  state.func_ir.loc))
... storing 'sample' a

Performing cosine normalization...
Starting MNN correct iteration. Reference batch: 0
Step 1 of 1: processing batch 1
  Looking for MNNs...
  Computing correction vectors...
  Adjusting variance...
  Applying correction...
MNN correction complete. Gathering output...
Packing AnnData object...
Done.
Performing cosine normalization...
Starting MNN correct iteration. Reference batch: 0
Step 1 of 1: processing batch 1
  Looking for MNNs...
  Computing correction vectors...
  Adjusting variance...
  Applying correction...
MNN correction complete. Gathering output...
Packing AnnData object...
Done.
Performing cosine normalization...
Starting MNN correct iteration. Reference batch: 0
Step 1 of 1: processing batch 1
  Looking for MNNs...
  Computing correction vectors...
  Adjusting variance...
  Applying correction...
MNN correction complete. Gathering output...
Packing AnnData object...
Done.
Performing cosine normalization...
Starting MNN correct iteration. Reference batch: 0
Step 1 of 1: p

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  


Unnamed: 0,divergence_score,entropy_score,method,sample,silhouette_score
sample1,0.529782,0.563564,raw,sample1,-0.0993585
sample1,0.138001,0.670678,reg,sample1,-0.0391927
sample1,0.0498539,0.68209,combat,sample1,-0.101426
sample1,0.0391844,0.683331,mnn,sample1,-0.114747


In [None]:
####################  Chevrier data  ###################

In [7]:
from loading_and_preprocessing.data_loader import load_data_basic
# global settings
data_path = wd+'/Dataset5/'
save_path = wd+'/Dataset5/eval_scores_upsample/'

path = os.path.join(data_path, 'chevrier_data_pooled_full_panels.parquet')
df_full = None
for sample in ['sample5','sample75','sample65']:
    x1_train, x1_test, x2_train, x2_test = load_data_basic(path,
                                 sample=sample, batch_names=['batch1', 'batch3'], panel=None)

    tmp_  = pd.concat([x1_train, x2_train])
    if df_full is None:
        df_full = tmp_
    else:
        df_full = pd.concat([df_full, tmp_], axis = 0 )

metadata_batch = [ i.split('_')[0] for i in df_full.index]
metadata_cell = [ i.split('_')[-1] for i in df_full.index]
metadata_sample = [ i.split('_')[1] for i in df_full.index]
df_full['metadata_batch'] = metadata_batch
df_full['metadata_celltype'] = metadata_cell
df_full['metadata_sample'] = metadata_sample
df_full = df_full.dropna(axis=1)
df_full = df_full.reset_index(drop = True)

x1 shape (26541, 11)
x2 shape (26541, 11)
x1 shape (6064, 11)
x2 shape (6064, 11)
x1 shape (15877, 11)
x2 shape (15877, 11)


In [9]:
# # global settings
# data_path = wd+'/Dataset5/'
# save_path = wd+'/Dataset5/eval_scores_upsample/'

# # load data
# df_full = pd.read_parquet(data_path+'chevrier_data_pooled_full_panels.parquet')
# df_full = df_full.dropna(axis=1)
adata_full = convert_to_ann(df_full, sample_col_name = "metadata_sample", batch_col_name="metadata_batch",
                  celltype_col_name = 'metadata_celltype')
adata_full.obs_names_make_unique()
# for a quick run subset the data to 3 selected samples
samples_selected = ['sample5','sample75','sample65']
adata_full = adata_full[adata_full.obs['sample'].isin(samples_selected),:].copy()

Transforming to str index.


In [10]:
suffix='full'
eval_full_raw = wrapper_raw(adata_full, samples_selected, save_path, suffix)
eval_full_batch_reg = wrapper_reg(adata_full, samples_selected, save_path, suffix)
eval_full_batch_combat = wrapper_combat(adata_full, samples_selected, save_path, suffix)
eval_full_batch_mnn_mean = wrapper_mnn(adata_full, samples_selected, save_path, suffix)

# merge all baseline scores
eval_all = pd.concat([eval_full_raw, eval_full_batch_reg, eval_full_batch_combat, eval_full_batch_mnn_mean])
eval_all = eval_all.sort_values(by=['sample', 'method'])
eval_all.to_csv(save_path+'scores_baselines_full.csv')
eval_all

The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.

File "../../../../../../../anaconda3/envs/dl/lib/python3.7/site-packages/umap/utils.py", line 409:
@numba.njit(parallel=True)
def build_candidates(current_graph, n_vertices, n_neighbors, max_candidates, rng_state):
^

  current_graph, n_vertices, n_neighbors, max_candidates, rng_state
The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.

File "../../../../../../../anaconda3/envs/dl/lib/python3.7/site-packages/umap/nndescent.py", line 47:
    @numba.njit(parallel=True)
    def nn_descent(
    ^

  state.func_ir.loc))
The keyword argument 'parall


File "../../../../../../../anaconda3/envs/dl/lib/python3.7/site-packages/scanpy/preprocessing/_combat.py", line 305:
def _it_sol(s_data, g_hat, d_hat, g_bar, t2, a, b, conv=0.0001) -> Tuple[float, float]:
    <source elided>
    change = 1
    count = 0
    ^

  state.func_ir.loc))
Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.

For more information visit http://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit

File "../../../../../../../anaconda3/envs/dl/lib/python3.7/site-packages/scanpy/preprocessing/_combat.py", line 305:
def _it_sol(s_data, g_hat, d_hat, g_bar, t2, a, b, conv=0.0001) -> Tuple[float, float]:
    <source elided>
    change = 1
    count = 0
    ^

... storing 'sample' as categorical
... storing 'batch' as categorical
... storing 'cell_type' as categorical
... storing 'sample' as categorical
... storing 'batch' 


File "../../../../../../../anaconda3/envs/dl/lib/python3.7/site-packages/mnnpy/utils.py", line 103:
@jit(float32[:, :](float32[:, :], float32[:, :], int32[:], int32[:], float32[:, :], float32))
def compute_correction(data1, data2, mnn1, mnn2, data2_or_raw2, sigma):
^

  state.func_ir.loc))
Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.

For more information visit http://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit

File "../../../../../../../anaconda3/envs/dl/lib/python3.7/site-packages/mnnpy/utils.py", line 103:
@jit(float32[:, :](float32[:, :], float32[:, :], int32[:], int32[:], float32[:, :], float32))
def compute_correction(data1, data2, mnn1, mnn2, data2_or_raw2, sigma):
^

Compilation is falling back to object mode WITH looplifting enabled because Function "adjust_s_variance" failed type inference due to: Untyped global 

Performing cosine normalization...


Compilation is falling back to object mode WITH looplifting enabled because Function "l2_norm" failed type inference due to: Invalid use of Function(<function norm at 0x1062afb00>) with argument(s) of type(s): (axis=Literal[int](1), x=array(float32, 2d, A))
 * parameterized
In definition 0:
    TypeError: norm_impl() got an unexpected keyword argument 'x'
    raised from /Users/joannaf/anaconda3/envs/dl/lib/python3.7/site-packages/numba/typing/templates.py:517
In definition 1:
    TypeError: norm_impl() got an unexpected keyword argument 'x'
    raised from /Users/joannaf/anaconda3/envs/dl/lib/python3.7/site-packages/numba/typing/templates.py:517
This error is usually caused by passing an argument of a type that is unsupported by the named function.
[1] During: resolving callee type: Function(<function norm at 0x1062afb00>)
[2] During: typing of call at /Users/joannaf/anaconda3/envs/dl/lib/python3.7/site-packages/mnnpy/utils.py (16)


File "../../../../../../../anaconda3/envs/dl/lib/py

Starting MNN correct iteration. Reference batch: 0
Step 1 of 1: processing batch 1
  Looking for MNNs...


Compilation is falling back to object mode WITHOUT looplifting enabled because Function "find_mutual_nn" failed type inference due to: non-precise type pyobject
[1] During: typing of argument at /Users/joannaf/anaconda3/envs/dl/lib/python3.7/site-packages/mnnpy/utils.py (94)

File "../../../../../../../anaconda3/envs/dl/lib/python3.7/site-packages/mnnpy/utils.py", line 94:
def find_mutual_nn(data1, data2, k1, k2, n_jobs):
    <source elided>
    mutual_2 = []
    for index_2 in range(data2.shape[0]):
    ^

  @jit((float32[:, :], float32[:, :], int8, int8, int8))

File "../../../../../../../anaconda3/envs/dl/lib/python3.7/site-packages/mnnpy/utils.py", line 94:
def find_mutual_nn(data1, data2, k1, k2, n_jobs):
    <source elided>
    mutual_2 = []
    for index_2 in range(data2.shape[0]):
    ^

  state.func_ir.loc))
Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.

For more information visit http://numba.

  Computing correction vectors...


Encountered the use of a type that is scheduled for deprecation: type 'reflected list' found for argument 'mnn2' of function 'compute_correction'.

For more information visit http://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-reflection-for-list-and-set-types

File "../../../../../../../anaconda3/envs/dl/lib/python3.7/site-packages/mnnpy/utils.py", line 107:
def compute_correction(data1, data2, mnn1, mnn2, data2_or_raw2, sigma):
    <source elided>
    vect_reduced = np.zeros((data2.shape[0], vect.shape[1]), dtype=np.float32)
    for index, ve in zip(mnn2, vect):
    ^

Compilation is falling back to object mode WITHOUT looplifting enabled because Function compute_correction failed at nopython mode lowering due to: iterating over 2D array

File "../../../../../../../anaconda3/envs/dl/lib/python3.7/site-packages/mnnpy/utils.py", line 107:
def compute_correction(data1, data2, mnn1, mnn2, data2_or_raw2, sigma):
    <source elided>
    vect_reduced = np.zero

  Adjusting variance...
  Applying correction...
MNN correction complete. Gathering output...
Packing AnnData object...
Done.
Performing cosine normalization...
Starting MNN correct iteration. Reference batch: 0
Step 1 of 1: processing batch 1
  Looking for MNNs...
  Computing correction vectors...
  Adjusting variance...
  Applying correction...
MNN correction complete. Gathering output...
Packing AnnData object...
Done.
Performing cosine normalization...
Starting MNN correct iteration. Reference batch: 0
Step 1 of 1: processing batch 1
  Looking for MNNs...


IndexError: index 1000 is out of bounds for axis 0 with size 1000

In [31]:
eval_all = pd.concat([eval_full_raw, eval_full_batch_reg, eval_full_batch_combat, eval_full_batch_mnn_mean])
eval_all = eval_all.sort_values(by=['sample', 'method'])
eval_all.to_csv(save_path+'scores_baselines_full_upsample.csv')
eval_all

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


Unnamed: 0,divergence_score,entropy_score,method,sample,silhouette_score
sample5,0.462772,0.536786,combat,sample5,0.212167
sample5,0.771196,0.616088,mnn,sample5,0.183975
sample5,1.16913,0.514197,raw,sample5,0.045573
sample5,1.1116,0.288478,reg,sample5,0.21912
sample65,0.414426,0.554443,combat,sample65,0.235609
sample65,0.508363,0.647851,mnn,sample65,0.11293
sample65,1.00274,0.543765,raw,sample65,0.0782595
sample65,0.794367,0.272582,reg,sample65,0.168349
sample75,0.358697,0.556734,combat,sample75,-0.0254128
sample75,0.820883,0.609393,mnn,sample75,0.000838843
