### This notebook prepares a single, concatenate anndata object, using using increasing samples of all files

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import scanpy as sc
from os.path import join, exists
from os import listdir
import anndata
import scipy
import numpy as np
import sys
import pandas as pd

import utils

# convert counts into float32
# Convenience method for computing the size of objects
def print_size_in_MB(x):
    return '{:.3} MB'.format(x.__sizeof__()/1e6)

def print_size_in_MB_sparse_matrix(a):
    # a = scipy.sparse.csr_matrix(np.random.randint(10, size=(40, 3)))
    # x = a.data.nbytes + a.indptr.nbytes + a.indices.nbytes
    size = a.data.size/(1024**2)
    return '{:.3} MB'.format(size)

import warnings
warnings.filterwarnings("ignore")

### attempt combinations of Chen with other datasets, increasingly.

In [3]:
combinations = [# ['Hackney', 'Roska', 'Hafler', 'Wong', 'Scheetz', 'Chen_b', 'Chen_c', 'Sanes', 'Chen_a'],
                ['Hackney', 'Roska', 'Hafler', 'Wong', 'Scheetz', 'Chen_b', 'Chen_c', 'Sanes', 'Chen_a'],
                # ['Chen_b', 'Chen_c', 'Chen_a'],
                ['Chen_b', 'Chen_c', 'Chen_a']]
    
dataset_codes = ['all', 'Chen'] # , 'Chen'
# add Chen_a plus all others
for k in combinations[0]:
    if 'Chen' in k:
        continue
    print(k)
    combinations.append(combinations[1] + [k])
    dataset_codes.append('Chen+%s' % k)

for x, y in zip(dataset_codes, combinations):
    print(x, y)


Hackney
Roska
Hafler
Wong
Scheetz
Sanes
all ['Hackney', 'Roska', 'Hafler', 'Wong', 'Scheetz', 'Chen_b', 'Chen_c', 'Sanes', 'Chen_a']
Chen ['Chen_b', 'Chen_c', 'Chen_a']
Chen+Hackney ['Chen_b', 'Chen_c', 'Chen_a', 'Hackney']
Chen+Roska ['Chen_b', 'Chen_c', 'Chen_a', 'Roska']
Chen+Hafler ['Chen_b', 'Chen_c', 'Chen_a', 'Hafler']
Chen+Wong ['Chen_b', 'Chen_c', 'Chen_a', 'Wong']
Chen+Scheetz ['Chen_b', 'Chen_c', 'Chen_a', 'Scheetz']
Chen+Sanes ['Chen_b', 'Chen_c', 'Chen_a', 'Sanes']


In [4]:
import gc

overwrite = False
for n_sample_per_batch in [500]: # , 500, None]:
    # if n_sample_per_batch != None:
    #    continue
    if n_sample_per_batch != None and n_sample_per_batch != 500:
        continue
    # examine types, columns and others incorporated in the object
    
    code_n_cells = (('_' + str(n_sample_per_batch) if n_sample_per_batch is not None else ''))
    
    print('# of cells (input argument)', n_sample_per_batch)
    
    code_output = (('_' + str(n_sample_per_batch) if n_sample_per_batch is not None else '_all'))

    for dataset_names_subset, dataset_code in zip(combinations, dataset_codes):
        
        output_path = '../../data/integration_march_2021/input/input%s_cells_%s.h5ad' % (code_output, dataset_code)
        print(exists(output_path), output_path)
        
        if exists(output_path):
            continue
        
        print(dataset_code, dataset_names_subset)
        p1 = output_path.replace('.h5ad', '_part1.h5ad')

        names1 = dataset_names_subset[:4]
        names2 = dataset_names_subset[4:-1]
        names3 = dataset_names_subset[-1:]
        
        if dataset_code != 'all':
            names1, names2, names3 = names1, [], []
        print(names1)
        print(names2)
        print(names3)
        
        if not exists(p1) and len(names1) > 0:
            ad1 = utils.get_datasets(names1, code_n_cells=code_n_cells)

            if 'RNA_snn_res.0.8' in ad1.obs:
                ad1.obs['RNA_snn_res.0.8'] = ad1.obs['RNA_snn_res.0.8'].astype(int)
            print('ad1')
            print ('loading datasets 1 done...')
            print(ad1.obs.dataset.value_counts())
            # save part1
             # save part1
            ad1 = ad1[ad1.obs.dataset.isin(set(names1)),:]
            ad1.write(p1, compression='lzf')
            del ad1
            print(p1)

        p2 = output_path.replace('.h5ad', '_part2.h5ad')
        if not exists(p2) and len(names2) > 0:
            print('loading', names2)
            ad2 = utils.get_datasets(names2, code_n_cells=code_n_cells)
            if 'RNA_snn_res.0.8' in ad2.obs:
                ad2.obs['RNA_snn_res.0.8'] = ad2.obs['RNA_snn_res.0.8'].astype(int)
            print('ad2')
            print(ad2)
            print(ad2.obs.index)
            print ('loading datasets 2 done...')
            print(ad2.obs.dataset.value_counts())

            # save part1
            ad2 = ad2[ad2.obs.dataset.isin(set(names2)),:]
            ad2.write(p2, compression='lzf')
            del ad2
            print(p2)   

        print(output_path)
        p3 = output_path.replace('.h5ad', '_part3.h5ad')
        print(p3)
        
        
        if not exists(p3) and len(names3) > 0:
            print('loading', names3)
            ad3 = utils.get_datasets(names3, code_n_cells=code_n_cells)

            if 'RNA_snn_res.0.8' in ad3.obs:
                ad3.obs['RNA_snn_res.0.8'] = ad3.obs['RNA_snn_res.0.8'].astype(int)

            print('ad2')
            print(ad3)
            print(ad3.obs.index)
            print ('loading datasets 3 done...')
            print(ad3.obs.dataset.value_counts())

            ad3 = ad3[ad3.obs.dataset.isin(set(names3)),:]
            ad3.write(p3, compression='lzf')
            del ad3
            print(p3)    

        gc.collect()

        ad1, ad2, ad3 = None, None, None
        # filter: only the datasets subset can be in the object
        ad1 = sc.read_h5ad(p1) #  cache=True)
        ad1 = ad1[ad1.obs['dataset'].isin(set(dataset_names_subset))]
        print(ad1.obs.dataset.value_counts())

        if exists(p2) and exists(p3):
            print(p2)
            print(p3)
            ad2 = sc.read_h5ad(p2) #  cache=True)
            ad3 = sc.read_h5ad(p3) #  cache=True)
            ad2 = ad2[ad2.obs['dataset'].isin(set(dataset_names_subset))]
            ad3 = ad3[ad3.obs['dataset'].isin(set(dataset_names_subset))]
            print(ad2.obs.dataset.value_counts())
            print(ad3.obs.dataset.value_counts())
            # print(ad2.obs.dataset.value_counts())

        gc.collect()
        print('concatenating...')
        ad_final = anndata.concat([ad1, ad2, ad3]) if (ad2 is not None and ad3 is not None) else ad1


        # print(ad1.shape, ad2.shape, ad3.shape)
        
        gc.collect()
        print('done...')

        print('ad final')
        # print(ad1.shape, ad2.shape)
        print(ad_final.shape)
        # print(ad_final.obs.index)

        # define a unified code for all categories
        ad_final.obs['batch.merged'] = ad_final.obs['dataset'].astype(str) + ':' + ad_final.obs['batch'].astype(str)
        ad_final.obs['batch.merged'] = ad_final.obs['batch.merged'].astype('category').cat.codes
        # input_scib.obs['batch.merged'].value_counts()
        ad_final.obs['batch.merged'] = ad_final.obs['batch.merged'].astype('category').astype(str)
        # print(ad_final.obs['batch.merged'].value_counts())
        
        # include the donor information
        donor = pd.read_csv('data/donor_details.tsv', sep='\t')
        donor['k'] = donor['file'].str.replace('.', '').str.replace('h5ad', '')
        donor['dataset'] = donor['k'].str.split('/').str[1]
        donor['filename'] = donor['k'].str.split('/').str[2]
        donor_by_filename = donor[['donor', 'filename']].set_index('filename')['donor'].to_dict()
        ad_final.obs['donor'] = ad_final.obs['filename'].map(donor_by_filename)
        
        
        ad_final.obs['batch_donor_dataset'] = ad_final.obs['donor'].astype(str) + ':' + ad_final.obs['dataset'].astype(str) + ':' + ad_final.obs['batch'].astype(str)

        print('before batch filter (n=100)')
        print(ad_final.shape)
        ad_final = ad_final[ad_final.obs['batch_donor_dataset'].map(ad_final.obs['batch_donor_dataset'].value_counts().to_dict()) > 100,:]
        ad_final.obs['batch_donor_dataset'].value_counts()

        ad_final.obs['batch_donor_dataset'] = ad_final.obs['batch_donor_dataset'].astype('category')
        
        print('after batch filter (n=100)')
        print(ad_final.shape)
        print('saving to output...')
        ad_final.write(output_path, compression='lzf')

        if exists(p1):
            os.remove(p1)
        if exists(p2):
            os.remove(p2)
        if exists(p3):
            os.remove(p3)
        
        print('done...')
        
        # assert False


# of cells (input argument) 500
False ../../data/integration_march_2021/input/input_500_cells_all.h5ad
all ['Hackney', 'Roska', 'Hafler', 'Wong', 'Scheetz', 'Chen_b', 'Chen_c', 'Sanes', 'Chen_a']
['Hackney', 'Roska', 'Hafler', 'Wong']
['Scheetz', 'Chen_b', 'Chen_c', 'Sanes']
['Chen_a']
Hackney _500
Hackney_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Hackney_500.h5ad


Trying to set attribute `.var` of view, copying.


after loading (5499, 36974)
filtering by number of min_cells with gene
after (5499, 19090)
['Hackney', 'Roska', 'Hafler', 'Wong']
Series([], Name: dataset, dtype: int64)
Roska _500
Roska_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Roska_500.h5ad


Trying to set attribute `.var` of view, copying.


after loading (10109, 12385)
filtering by number of min_cells with gene
after (10109, 12310)
['Hackney', 'Roska', 'Hafler', 'Wong']
Series([], Name: dataset, dtype: int64)
Hafler _500
Hafler_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Hafler_500.h5ad


Trying to set attribute `.var` of view, copying.


after loading (2630, 13913)
filtering by number of min_cells with gene
after (2630, 10350)
['Hackney', 'Roska', 'Hafler', 'Wong']
Series([], Name: dataset, dtype: int64)
Wong _500
Wong_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Wong_500.h5ad


Trying to set attribute `.var` of view, copying.


after loading (2500, 18769)
filtering by number of min_cells with gene
after (2500, 11280)
['Hackney', 'Roska', 'Hafler', 'Wong']
Series([], Name: dataset, dtype: int64)
objects before concatenation
concatenating...


Trying to set attribute `.obs` of view, copying.


names upon integration
Index(['CELL3980150-0:Hackney:0', 'CELL3980173-0:Hackney:0',
       'CELL3980184-0:Hackney:0', 'CELL3980226-0:Hackney:0',
       'CELL3980300-0:Hackney:0', 'CELL3980311-0:Hackney:0',
       'CELL3980366-0:Hackney:0', 'CELL3980412-0:Hackney:0',
       'CELL3980447-0:Hackney:0', 'CELL3980462-0:Hackney:0',
       ...
       'TTGAACGAGTGTCCCG-1-4:Wong:4', 'TTGAACGTCCAGAGGA-1-4:Wong:4',
       'TTGCCGTAGGGCTTCC-1-4:Wong:4', 'TTGGAACTCATGCAAC-1-4:Wong:4',
       'TTGGCAAAGATGGGTC-1-4:Wong:4', 'TTGTAGGCATCACCCT-1-4:Wong:4',
       'TTTACTGAGACGCAAC-1-4:Wong:4', 'TTTACTGTCAAAGTAG-1-4:Wong:4',
       'TTTATGCCATAACCTG-1-4:Wong:4', 'TTTGCGCTCGGCGCTA-1-4:Wong:4'],
      dtype='object', length=20738)
ad1
loading datasets 1 done...
Roska      10109
Hackney     5499
Hafler      2630
Wong        2500
Name: dataset, dtype: int64


... storing 'batch' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'dataset' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'filename' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'scpred_prediction' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'cell.type' as categorical


../../data/integration_march_2021/input/input_500_cells_all_part1.h5ad
loading ['Scheetz', 'Chen_b', 'Chen_c', 'Sanes']
Scheetz _500
Scheetz_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Scheetz_500.h5ad


Trying to set attribute `.var` of view, copying.


after loading (2582, 16782)
filtering by number of min_cells with gene
after (2582, 13729)
['Scheetz', 'Chen_b', 'Chen_c', 'Sanes']
Series([], Name: dataset, dtype: int64)
Chen_b _500
Chen_b_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Chen_b_500.h5ad


Trying to set attribute `.var` of view, copying.


after loading (7500, 23428)
filtering by number of min_cells with gene
after (7500, 16958)
['Scheetz', 'Chen_b', 'Chen_c', 'Sanes']
Series([], Name: dataset, dtype: int64)
Chen_c _500
Chen_c_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Chen_c_500.h5ad
after loading (10000, 23797)
filtering by number of min_cells with gene


Trying to set attribute `.var` of view, copying.


after (10000, 18176)
['Scheetz', 'Chen_b', 'Chen_c', 'Sanes']
Series([], Name: dataset, dtype: int64)
Sanes _500
Sanes_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Sanes_500.h5ad


Trying to set attribute `.var` of view, copying.


after loading (9020, 5682)
filtering by number of min_cells with gene
after (9020, 5630)
['Scheetz', 'Chen_b', 'Chen_c', 'Sanes']
Series([], Name: dataset, dtype: int64)
objects before concatenation
concatenating...


Trying to set attribute `.obs` of view, copying.


names upon integration
Index(['AAACGAACAGGGACTA-1-0:Scheetz:0', 'AAACGAAGTTCTCCAC-1-0:Scheetz:0',
       'AAACGCTGTGGTTTGT-1-0:Scheetz:0', 'AAAGGGCCAAGGTCTT-1-0:Scheetz:0',
       'AAAGGGCCAGTGTGCC-1-0:Scheetz:0', 'AAAGTGATCTTGCAGA-1-0:Scheetz:0',
       'AAATGGAGTCGTACAT-1-0:Scheetz:0', 'AACACACAGATTTGCC-1-0:Scheetz:0',
       'AACAGGGAGTGCACTT-1-0:Scheetz:0', 'AACCACATCATCGGGC-1-0:Scheetz:0',
       ...
       'TTGGTTTTCTGGTCAA-1-18:Sanes:18', 'TTGTGGATCGCTATTT-1-18:Sanes:18',
       'TTGTGTTTCGCAGATT-1-18:Sanes:18', 'TTGTTTGTCAGAGCGA-1-18:Sanes:18',
       'TTTACCATCTCACTCG-1-18:Sanes:18', 'TTTACTGCAAGGTCGA-1-18:Sanes:18',
       'TTTAGTCTCGGAGTGA-1-18:Sanes:18', 'TTTCACACAAGAAATC-1-18:Sanes:18',
       'TTTCCTCCAGGGTCTC-1-18:Sanes:18', 'TTTGTTGAGGAACATT-1-18:Sanes:18'],
      dtype='object', length=29102)
ad2
AnnData object with n_obs × n_vars = 29102 × 4703
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'RNA_snn_res.0.8', 'seurat_clusters', 'scpred_AC', 'scpred_Astrocyte', '

... storing 'scpred_prediction' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'dataset' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'filename' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'batch' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'cell.type' as categorical


../../data/integration_march_2021/input/input_500_cells_all_part2.h5ad
../../data/integration_march_2021/input/input_500_cells_all.h5ad
../../data/integration_march_2021/input/input_500_cells_all_part3.h5ad
loading ['Chen_a']
Chen_a _500
Chen_a_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Chen_a_500.h5ad
after loading (20000, 15903)
filtering by number of min_cells with gene


Trying to set attribute `.var` of view, copying.


after (20000, 14453)
['Chen_a']
Series([], Name: dataset, dtype: int64)
objects before concatenation
concatenating...
names upon integration
Index(['AAAGGGCGTGATTAGA-1_1-0:Chen_a:0', 'AATGGAAGTTACCCAA-1_1-0:Chen_a:0',
       'ACAAAGATCACTGGTA-1_1-0:Chen_a:0', 'ACCAAACTCTAAACGC-1_1-0:Chen_a:0',
       'ACGTACAGTAAGGCTG-1_1-0:Chen_a:0', 'ACTCTCGAGGTTGAGC-1_1-0:Chen_a:0',
       'ACTTTCACACGTATAC-1_1-0:Chen_a:0', 'AGCCAATCATATGAAG-1_1-0:Chen_a:0',
       'AGTGTTGTCGTAATGC-1_1-0:Chen_a:0', 'ATCACGAAGCATGCGA-1_1-0:Chen_a:0',
       ...
       'TTGTTCAAGACGTCCC-1-39:Chen_a:39', 'TTGTTGTAGGGCATGT-1-39:Chen_a:39',
       'TTTACGTAGCCTCTCT-1-39:Chen_a:39', 'TTTACGTTCCTAGAGT-1-39:Chen_a:39',
       'TTTACGTTCTCCGAAA-1-39:Chen_a:39', 'TTTACTGAGTACAGAT-1-39:Chen_a:39',
       'TTTATGCGTACGAGCA-1-39:Chen_a:39', 'TTTATGCGTGGCCCAT-1-39:Chen_a:39',
       'TTTGATCCATCGAAGG-1-39:Chen_a:39', 'TTTGGAGGTTCCTACC-1-39:Chen_a:39'],
      dtype='object', length=20000)
ad2
AnnData object with n_obs × n_vars = 

Trying to set attribute `.obs` of view, copying.


done...
ad final
(69840, 3848)
before batch filter (n=100)
(69840, 3848)


... storing 'batch' as categorical
... storing 'dataset' as categorical
... storing 'filename' as categorical
... storing 'batch.merged' as categorical
... storing 'donor' as categorical


after batch filter (n=100)
(69632, 3848)
saving to output...
done...
False ../../data/integration_march_2021/input/input_500_cells_Chen.h5ad
Chen ['Chen_b', 'Chen_c', 'Chen_a']
['Chen_b', 'Chen_c', 'Chen_a']
[]
[]
Chen_b _500
Chen_b_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Chen_b_500.h5ad


Trying to set attribute `.var` of view, copying.


after loading (7500, 23428)
filtering by number of min_cells with gene
after (7500, 16958)
['Chen_b', 'Chen_c', 'Chen_a']
Series([], Name: dataset, dtype: int64)
Chen_c _500
Chen_c_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Chen_c_500.h5ad
after loading (10000, 23797)
filtering by number of min_cells with gene


Trying to set attribute `.var` of view, copying.


after (10000, 18176)
['Chen_b', 'Chen_c', 'Chen_a']
Series([], Name: dataset, dtype: int64)
Chen_a _500
Chen_a_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Chen_a_500.h5ad
after loading (20000, 15903)
filtering by number of min_cells with gene


Trying to set attribute `.var` of view, copying.


after (20000, 14453)
['Chen_b', 'Chen_c', 'Chen_a']
Series([], Name: dataset, dtype: int64)
objects before concatenation
concatenating...


Trying to set attribute `.obs` of view, copying.


names upon integration
Index(['AAAGGATTCTTGCAGA-1-0:Chen_b:0', 'AAAGGGCGTCTGGTTA-1-0:Chen_b:0',
       'AAAGTGACATGTGACT-1-0:Chen_b:0', 'AAATGGACACCTTCGT-1-0:Chen_b:0',
       'AAATGGACATCCCACT-1-0:Chen_b:0', 'AACAACCAGACTTCAC-1-0:Chen_b:0',
       'AACAACCTCCGTGCGA-1-0:Chen_b:0', 'AACCAACGTGTTCATG-1-0:Chen_b:0',
       'AACCATGTCTGAGGTT-1-0:Chen_b:0', 'AACGGGAAGGTGCTGA-1-0:Chen_b:0',
       ...
       'TTGTTCAAGACGTCCC-1-39:Chen_a:39', 'TTGTTGTAGGGCATGT-1-39:Chen_a:39',
       'TTTACGTAGCCTCTCT-1-39:Chen_a:39', 'TTTACGTTCCTAGAGT-1-39:Chen_a:39',
       'TTTACGTTCTCCGAAA-1-39:Chen_a:39', 'TTTACTGAGTACAGAT-1-39:Chen_a:39',
       'TTTATGCGTACGAGCA-1-39:Chen_a:39', 'TTTATGCGTGGCCCAT-1-39:Chen_a:39',
       'TTTGATCCATCGAAGG-1-39:Chen_a:39', 'TTTGGAGGTTCCTACC-1-39:Chen_a:39'],
      dtype='object', length=37500)
ad1
loading datasets 1 done...
Chen_a    20000
Chen_c    10000
Chen_b     7500
Name: dataset, dtype: int64


... storing 'batch' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'dataset' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'filename' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'scpred_prediction' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'cell.type' as categorical


../../data/integration_march_2021/input/input_500_cells_Chen_part1.h5ad
../../data/integration_march_2021/input/input_500_cells_Chen.h5ad
../../data/integration_march_2021/input/input_500_cells_Chen_part3.h5ad


Trying to set attribute `.obs` of view, copying.


Chen_a    20000
Chen_c    10000
Chen_b     7500
Name: dataset, dtype: int64
concatenating...
done...
ad final
(37500, 13144)


Trying to set attribute `.obs` of view, copying.


before batch filter (n=100)
(37500, 13144)


... storing 'batch.merged' as categorical
... storing 'donor' as categorical


after batch filter (n=100)
(37500, 13144)
saving to output...
done...
False ../../data/integration_march_2021/input/input_500_cells_Chen+Hackney.h5ad
Chen+Hackney ['Chen_b', 'Chen_c', 'Chen_a', 'Hackney']
['Chen_b', 'Chen_c', 'Chen_a', 'Hackney']
[]
[]
Chen_b _500
Chen_b_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Chen_b_500.h5ad


Trying to set attribute `.var` of view, copying.


after loading (7500, 23428)
filtering by number of min_cells with gene
after (7500, 16958)
['Chen_b', 'Chen_c', 'Chen_a', 'Hackney']
Series([], Name: dataset, dtype: int64)
Chen_c _500
Chen_c_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Chen_c_500.h5ad
after loading (10000, 23797)
filtering by number of min_cells with gene


Trying to set attribute `.var` of view, copying.


after (10000, 18176)
['Chen_b', 'Chen_c', 'Chen_a', 'Hackney']
Series([], Name: dataset, dtype: int64)
Chen_a _500
Chen_a_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Chen_a_500.h5ad
after loading (20000, 15903)
filtering by number of min_cells with gene


Trying to set attribute `.var` of view, copying.


after (20000, 14453)
['Chen_b', 'Chen_c', 'Chen_a', 'Hackney']
Series([], Name: dataset, dtype: int64)
Hackney _500
Hackney_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Hackney_500.h5ad


Trying to set attribute `.var` of view, copying.


after loading (5499, 36974)
filtering by number of min_cells with gene
after (5499, 19090)
['Chen_b', 'Chen_c', 'Chen_a', 'Hackney']
Series([], Name: dataset, dtype: int64)
objects before concatenation
concatenating...


Trying to set attribute `.obs` of view, copying.


names upon integration
Index(['AAAGGATTCTTGCAGA-1-0:Chen_b:0', 'AAAGGGCGTCTGGTTA-1-0:Chen_b:0',
       'AAAGTGACATGTGACT-1-0:Chen_b:0', 'AAATGGACACCTTCGT-1-0:Chen_b:0',
       'AAATGGACATCCCACT-1-0:Chen_b:0', 'AACAACCAGACTTCAC-1-0:Chen_b:0',
       'AACAACCTCCGTGCGA-1-0:Chen_b:0', 'AACCAACGTGTTCATG-1-0:Chen_b:0',
       'AACCATGTCTGAGGTT-1-0:Chen_b:0', 'AACGGGAAGGTGCTGA-1-0:Chen_b:0',
       ...
       'CELL3965982-10:Hackney:10', 'CELL3965997-10:Hackney:10',
       'CELL3966000-10:Hackney:10', 'CELL3966028-10:Hackney:10',
       'CELL3966046-10:Hackney:10', 'CELL3966111-10:Hackney:10',
       'CELL3966117-10:Hackney:10', 'CELL3966137-10:Hackney:10',
       'CELL3966149-10:Hackney:10', 'CELL3966180-10:Hackney:10'],
      dtype='object', length=42999)
ad1
loading datasets 1 done...
Chen_a     20000
Chen_c     10000
Chen_b      7500
Hackney     5499
Name: dataset, dtype: int64


... storing 'batch' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'dataset' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'filename' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'scpred_prediction' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'cell.type' as categorical


../../data/integration_march_2021/input/input_500_cells_Chen+Hackney_part1.h5ad
../../data/integration_march_2021/input/input_500_cells_Chen+Hackney.h5ad
../../data/integration_march_2021/input/input_500_cells_Chen+Hackney_part3.h5ad


Trying to set attribute `.obs` of view, copying.


Chen_a     20000
Chen_c     10000
Chen_b      7500
Hackney     5499
Name: dataset, dtype: int64
concatenating...
done...
ad final
(42999, 12451)


Trying to set attribute `.obs` of view, copying.


before batch filter (n=100)
(42999, 12451)


... storing 'batch.merged' as categorical
... storing 'donor' as categorical


after batch filter (n=100)
(42999, 12451)
saving to output...
done...
False ../../data/integration_march_2021/input/input_500_cells_Chen+Roska.h5ad
Chen+Roska ['Chen_b', 'Chen_c', 'Chen_a', 'Roska']
['Chen_b', 'Chen_c', 'Chen_a', 'Roska']
[]
[]
Chen_b _500
Chen_b_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Chen_b_500.h5ad


Trying to set attribute `.var` of view, copying.


after loading (7500, 23428)
filtering by number of min_cells with gene
after (7500, 16958)
['Chen_b', 'Chen_c', 'Chen_a', 'Roska']
Series([], Name: dataset, dtype: int64)
Chen_c _500
Chen_c_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Chen_c_500.h5ad
after loading (10000, 23797)
filtering by number of min_cells with gene


Trying to set attribute `.var` of view, copying.


after (10000, 18176)
['Chen_b', 'Chen_c', 'Chen_a', 'Roska']
Series([], Name: dataset, dtype: int64)
Chen_a _500
Chen_a_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Chen_a_500.h5ad
after loading (20000, 15903)
filtering by number of min_cells with gene


Trying to set attribute `.var` of view, copying.


after (20000, 14453)
['Chen_b', 'Chen_c', 'Chen_a', 'Roska']
Series([], Name: dataset, dtype: int64)
Roska _500
Roska_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Roska_500.h5ad


Trying to set attribute `.var` of view, copying.


after loading (10109, 12385)
filtering by number of min_cells with gene
after (10109, 12310)
['Chen_b', 'Chen_c', 'Chen_a', 'Roska']
Series([], Name: dataset, dtype: int64)
objects before concatenation
concatenating...


Trying to set attribute `.obs` of view, copying.


names upon integration
Index(['AAAGGATTCTTGCAGA-1-0:Chen_b:0', 'AAAGGGCGTCTGGTTA-1-0:Chen_b:0',
       'AAAGTGACATGTGACT-1-0:Chen_b:0', 'AAATGGACACCTTCGT-1-0:Chen_b:0',
       'AAATGGACATCCCACT-1-0:Chen_b:0', 'AACAACCAGACTTCAC-1-0:Chen_b:0',
       'AACAACCTCCGTGCGA-1-0:Chen_b:0', 'AACCAACGTGTTCATG-1-0:Chen_b:0',
       'AACCATGTCTGAGGTT-1-0:Chen_b:0', 'AACGGGAAGGTGCTGA-1-0:Chen_b:0',
       ...
       'TGGTTAGGTCTAAAGA-1-22:Roska:22', 'TGGTTCCCACTAGTAC-1-22:Roska:22',
       'TGTCCCACAGCCACCA-1-22:Roska:22', 'TGTCCCAGTAGAGCTG-1-22:Roska:22',
       'TGTTCCGGTCAAACTC-1-22:Roska:22', 'TGTTCCGTCTCAAACG-1-22:Roska:22',
       'TTAGGACCACTTAACG-1-22:Roska:22', 'TTCTCCTTCTCTGAGA-1-22:Roska:22',
       'TTGGCAATCCGCGCAA-1-22:Roska:22', 'TTTGGTTTCTGTGCAA-1-22:Roska:22'],
      dtype='object', length=47609)
ad1
loading datasets 1 done...
Chen_a    20000
Roska     10109
Chen_c    10000
Chen_b     7500
Name: dataset, dtype: int64


... storing 'batch' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'dataset' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'filename' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'scpred_prediction' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'cell.type' as categorical


../../data/integration_march_2021/input/input_500_cells_Chen+Roska_part1.h5ad
../../data/integration_march_2021/input/input_500_cells_Chen+Roska.h5ad
../../data/integration_march_2021/input/input_500_cells_Chen+Roska_part3.h5ad


Trying to set attribute `.obs` of view, copying.


Chen_a    20000
Roska     10109
Chen_c    10000
Chen_b     7500
Name: dataset, dtype: int64
concatenating...
done...
ad final
(47609, 9743)


Trying to set attribute `.obs` of view, copying.


before batch filter (n=100)
(47609, 9743)


... storing 'orig.ident' as categorical
... storing 'seurat_clusters' as categorical
... storing 'batch.merged' as categorical
... storing 'donor' as categorical


after batch filter (n=100)
(47424, 9743)
saving to output...
done...
False ../../data/integration_march_2021/input/input_500_cells_Chen+Hafler.h5ad
Chen+Hafler ['Chen_b', 'Chen_c', 'Chen_a', 'Hafler']
['Chen_b', 'Chen_c', 'Chen_a', 'Hafler']
[]
[]
Chen_b _500
Chen_b_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Chen_b_500.h5ad


Trying to set attribute `.var` of view, copying.


after loading (7500, 23428)
filtering by number of min_cells with gene
after (7500, 16958)
['Chen_b', 'Chen_c', 'Chen_a', 'Hafler']
Series([], Name: dataset, dtype: int64)
Chen_c _500
Chen_c_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Chen_c_500.h5ad
after loading (10000, 23797)
filtering by number of min_cells with gene


Trying to set attribute `.var` of view, copying.


after (10000, 18176)
['Chen_b', 'Chen_c', 'Chen_a', 'Hafler']
Series([], Name: dataset, dtype: int64)
Chen_a _500
Chen_a_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Chen_a_500.h5ad
after loading (20000, 15903)
filtering by number of min_cells with gene


Trying to set attribute `.var` of view, copying.


after (20000, 14453)
['Chen_b', 'Chen_c', 'Chen_a', 'Hafler']
Series([], Name: dataset, dtype: int64)
Hafler _500
Hafler_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Hafler_500.h5ad


Trying to set attribute `.var` of view, copying.


after loading (2630, 13913)
filtering by number of min_cells with gene
after (2630, 10350)
['Chen_b', 'Chen_c', 'Chen_a', 'Hafler']
Series([], Name: dataset, dtype: int64)
objects before concatenation
concatenating...


Trying to set attribute `.obs` of view, copying.


names upon integration
Index(['AAAGGATTCTTGCAGA-1-0:Chen_b:0', 'AAAGGGCGTCTGGTTA-1-0:Chen_b:0',
       'AAAGTGACATGTGACT-1-0:Chen_b:0', 'AAATGGACACCTTCGT-1-0:Chen_b:0',
       'AAATGGACATCCCACT-1-0:Chen_b:0', 'AACAACCAGACTTCAC-1-0:Chen_b:0',
       'AACAACCTCCGTGCGA-1-0:Chen_b:0', 'AACCAACGTGTTCATG-1-0:Chen_b:0',
       'AACCATGTCTGAGGTT-1-0:Chen_b:0', 'AACGGGAAGGTGCTGA-1-0:Chen_b:0',
       ...
       'TTGAGTGTCCCATAGA-1-5:Hafler:5', 'TTGGATGGTTTAGAGA-1-5:Hafler:5',
       'TTGTGGATCTAGTGTG-1-5:Hafler:5', 'TTGTTGTAGCCTCACG-1-5:Hafler:5',
       'TTGTTTGCAACTGGTT-1-5:Hafler:5', 'TTTACTGGTACCCAGC-1-5:Hafler:5',
       'TTTACTGTCCTCCACA-1-5:Hafler:5', 'TTTATGCTCCTGATAG-1-5:Hafler:5',
       'TTTCGATCAACTGGTT-1-5:Hafler:5', 'TTTGACTAGTCACTGT-1-5:Hafler:5'],
      dtype='object', length=40130)
ad1
loading datasets 1 done...
Chen_a    20000
Chen_c    10000
Chen_b     7500
Hafler     2630
Name: dataset, dtype: int64


... storing 'batch' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'dataset' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'filename' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'scpred_prediction' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'cell.type' as categorical


../../data/integration_march_2021/input/input_500_cells_Chen+Hafler_part1.h5ad
../../data/integration_march_2021/input/input_500_cells_Chen+Hafler.h5ad
../../data/integration_march_2021/input/input_500_cells_Chen+Hafler_part3.h5ad


Trying to set attribute `.obs` of view, copying.


Chen_a    20000
Chen_c    10000
Chen_b     7500
Hafler     2630
Name: dataset, dtype: int64
concatenating...
done...
ad final
(40130, 8762)


Trying to set attribute `.obs` of view, copying.


before batch filter (n=100)
(40130, 8762)


... storing 'batch.merged' as categorical
... storing 'donor' as categorical


after batch filter (n=100)
(40130, 8762)
saving to output...
done...
False ../../data/integration_march_2021/input/input_500_cells_Chen+Wong.h5ad
Chen+Wong ['Chen_b', 'Chen_c', 'Chen_a', 'Wong']
['Chen_b', 'Chen_c', 'Chen_a', 'Wong']
[]
[]
Chen_b _500
Chen_b_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Chen_b_500.h5ad


Trying to set attribute `.var` of view, copying.


after loading (7500, 23428)
filtering by number of min_cells with gene
after (7500, 16958)
['Chen_b', 'Chen_c', 'Chen_a', 'Wong']
Series([], Name: dataset, dtype: int64)
Chen_c _500
Chen_c_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Chen_c_500.h5ad
after loading (10000, 23797)
filtering by number of min_cells with gene


Trying to set attribute `.var` of view, copying.


after (10000, 18176)
['Chen_b', 'Chen_c', 'Chen_a', 'Wong']
Series([], Name: dataset, dtype: int64)
Chen_a _500
Chen_a_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Chen_a_500.h5ad
after loading (20000, 15903)
filtering by number of min_cells with gene


Trying to set attribute `.var` of view, copying.


after (20000, 14453)
['Chen_b', 'Chen_c', 'Chen_a', 'Wong']
Series([], Name: dataset, dtype: int64)
Wong _500
Wong_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Wong_500.h5ad


Trying to set attribute `.var` of view, copying.


after loading (2500, 18769)
filtering by number of min_cells with gene
after (2500, 11280)
['Chen_b', 'Chen_c', 'Chen_a', 'Wong']
Series([], Name: dataset, dtype: int64)
objects before concatenation
concatenating...


Trying to set attribute `.obs` of view, copying.


names upon integration
Index(['AAAGGATTCTTGCAGA-1-0:Chen_b:0', 'AAAGGGCGTCTGGTTA-1-0:Chen_b:0',
       'AAAGTGACATGTGACT-1-0:Chen_b:0', 'AAATGGACACCTTCGT-1-0:Chen_b:0',
       'AAATGGACATCCCACT-1-0:Chen_b:0', 'AACAACCAGACTTCAC-1-0:Chen_b:0',
       'AACAACCTCCGTGCGA-1-0:Chen_b:0', 'AACCAACGTGTTCATG-1-0:Chen_b:0',
       'AACCATGTCTGAGGTT-1-0:Chen_b:0', 'AACGGGAAGGTGCTGA-1-0:Chen_b:0',
       ...
       'TTGAACGAGTGTCCCG-1-4:Wong:4', 'TTGAACGTCCAGAGGA-1-4:Wong:4',
       'TTGCCGTAGGGCTTCC-1-4:Wong:4', 'TTGGAACTCATGCAAC-1-4:Wong:4',
       'TTGGCAAAGATGGGTC-1-4:Wong:4', 'TTGTAGGCATCACCCT-1-4:Wong:4',
       'TTTACTGAGACGCAAC-1-4:Wong:4', 'TTTACTGTCAAAGTAG-1-4:Wong:4',
       'TTTATGCCATAACCTG-1-4:Wong:4', 'TTTGCGCTCGGCGCTA-1-4:Wong:4'],
      dtype='object', length=40000)
ad1
loading datasets 1 done...
Chen_a    20000
Chen_c    10000
Chen_b     7500
Wong       2500
Name: dataset, dtype: int64


... storing 'batch' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'dataset' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'filename' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'scpred_prediction' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'cell.type' as categorical


../../data/integration_march_2021/input/input_500_cells_Chen+Wong_part1.h5ad
../../data/integration_march_2021/input/input_500_cells_Chen+Wong.h5ad
../../data/integration_march_2021/input/input_500_cells_Chen+Wong_part3.h5ad


Trying to set attribute `.obs` of view, copying.


Chen_a    20000
Chen_c    10000
Chen_b     7500
Wong       2500
Name: dataset, dtype: int64
concatenating...
done...
ad final
(40000, 9355)


Trying to set attribute `.obs` of view, copying.


before batch filter (n=100)
(40000, 9355)


... storing 'batch.merged' as categorical
... storing 'donor' as categorical


after batch filter (n=100)
(40000, 9355)
saving to output...
done...
False ../../data/integration_march_2021/input/input_500_cells_Chen+Scheetz.h5ad
Chen+Scheetz ['Chen_b', 'Chen_c', 'Chen_a', 'Scheetz']
['Chen_b', 'Chen_c', 'Chen_a', 'Scheetz']
[]
[]
Chen_b _500
Chen_b_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Chen_b_500.h5ad


Trying to set attribute `.var` of view, copying.


after loading (7500, 23428)
filtering by number of min_cells with gene
after (7500, 16958)
['Chen_b', 'Chen_c', 'Chen_a', 'Scheetz']
Series([], Name: dataset, dtype: int64)
Chen_c _500
Chen_c_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Chen_c_500.h5ad
after loading (10000, 23797)
filtering by number of min_cells with gene


Trying to set attribute `.var` of view, copying.


after (10000, 18176)
['Chen_b', 'Chen_c', 'Chen_a', 'Scheetz']
Series([], Name: dataset, dtype: int64)
Chen_a _500
Chen_a_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Chen_a_500.h5ad
after loading (20000, 15903)
filtering by number of min_cells with gene


Trying to set attribute `.var` of view, copying.


after (20000, 14453)
['Chen_b', 'Chen_c', 'Chen_a', 'Scheetz']
Series([], Name: dataset, dtype: int64)
Scheetz _500
Scheetz_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Scheetz_500.h5ad


Trying to set attribute `.var` of view, copying.


after loading (2582, 16782)
filtering by number of min_cells with gene
after (2582, 13729)
['Chen_b', 'Chen_c', 'Chen_a', 'Scheetz']
Series([], Name: dataset, dtype: int64)
objects before concatenation
concatenating...


Trying to set attribute `.obs` of view, copying.


names upon integration
Index(['AAAGGATTCTTGCAGA-1-0:Chen_b:0', 'AAAGGGCGTCTGGTTA-1-0:Chen_b:0',
       'AAAGTGACATGTGACT-1-0:Chen_b:0', 'AAATGGACACCTTCGT-1-0:Chen_b:0',
       'AAATGGACATCCCACT-1-0:Chen_b:0', 'AACAACCAGACTTCAC-1-0:Chen_b:0',
       'AACAACCTCCGTGCGA-1-0:Chen_b:0', 'AACCAACGTGTTCATG-1-0:Chen_b:0',
       'AACCATGTCTGAGGTT-1-0:Chen_b:0', 'AACGGGAAGGTGCTGA-1-0:Chen_b:0',
       ...
       'TTGGTTTGTCGAGCTC-1-5:Scheetz:5', 'TTGTGGAGTCACTTCC-1-5:Scheetz:5',
       'TTGTGGATCTAGCAAC-1-5:Scheetz:5', 'TTTACCATCTTACACT-1-5:Scheetz:5',
       'TTTATGCTCCATCCGT-1-5:Scheetz:5', 'TTTCACACATGCCGCA-1-5:Scheetz:5',
       'TTTCACAGTATGCGGA-1-5:Scheetz:5', 'TTTCATGTCTCTTGCG-1-5:Scheetz:5',
       'TTTGGAGCAAAGCTCT-1-5:Scheetz:5', 'TTTGTTGAGGCCTTGC-1-5:Scheetz:5'],
      dtype='object', length=40082)
ad1
loading datasets 1 done...
Chen_a     20000
Chen_c     10000
Chen_b      7500
Scheetz     2582
Name: dataset, dtype: int64


... storing 'batch' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'dataset' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'filename' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'scpred_prediction' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'cell.type' as categorical


../../data/integration_march_2021/input/input_500_cells_Chen+Scheetz_part1.h5ad
../../data/integration_march_2021/input/input_500_cells_Chen+Scheetz.h5ad
../../data/integration_march_2021/input/input_500_cells_Chen+Scheetz_part3.h5ad


Trying to set attribute `.obs` of view, copying.


Chen_a     20000
Chen_c     10000
Chen_b      7500
Scheetz     2582
Name: dataset, dtype: int64
concatenating...
done...
ad final
(40082, 10847)


Trying to set attribute `.obs` of view, copying.


before batch filter (n=100)
(40082, 10847)


... storing 'batch.merged' as categorical
... storing 'donor' as categorical


after batch filter (n=100)
(40082, 10847)
saving to output...
done...
False ../../data/integration_march_2021/input/input_500_cells_Chen+Sanes.h5ad
Chen+Sanes ['Chen_b', 'Chen_c', 'Chen_a', 'Sanes']
['Chen_b', 'Chen_c', 'Chen_a', 'Sanes']
[]
[]
Chen_b _500
Chen_b_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Chen_b_500.h5ad


Trying to set attribute `.var` of view, copying.


after loading (7500, 23428)
filtering by number of min_cells with gene
after (7500, 16958)
['Chen_b', 'Chen_c', 'Chen_a', 'Sanes']
Series([], Name: dataset, dtype: int64)
Chen_c _500
Chen_c_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Chen_c_500.h5ad
after loading (10000, 23797)
filtering by number of min_cells with gene


Trying to set attribute `.var` of view, copying.


after (10000, 18176)
['Chen_b', 'Chen_c', 'Chen_a', 'Sanes']
Series([], Name: dataset, dtype: int64)
Chen_a _500
Chen_a_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Chen_a_500.h5ad
after loading (20000, 15903)
filtering by number of min_cells with gene


Trying to set attribute `.var` of view, copying.


after (20000, 14453)
['Chen_b', 'Chen_c', 'Chen_a', 'Sanes']
Series([], Name: dataset, dtype: int64)
Sanes _500
Sanes_500.h5ad
../../data/integration_march_2021/input/bydataset_500/Sanes_500.h5ad


Trying to set attribute `.var` of view, copying.


after loading (9020, 5682)
filtering by number of min_cells with gene
after (9020, 5630)
['Chen_b', 'Chen_c', 'Chen_a', 'Sanes']
Series([], Name: dataset, dtype: int64)
objects before concatenation
concatenating...


Trying to set attribute `.obs` of view, copying.


names upon integration
Index(['AAAGGATTCTTGCAGA-1-0:Chen_b:0', 'AAAGGGCGTCTGGTTA-1-0:Chen_b:0',
       'AAAGTGACATGTGACT-1-0:Chen_b:0', 'AAATGGACACCTTCGT-1-0:Chen_b:0',
       'AAATGGACATCCCACT-1-0:Chen_b:0', 'AACAACCAGACTTCAC-1-0:Chen_b:0',
       'AACAACCTCCGTGCGA-1-0:Chen_b:0', 'AACCAACGTGTTCATG-1-0:Chen_b:0',
       'AACCATGTCTGAGGTT-1-0:Chen_b:0', 'AACGGGAAGGTGCTGA-1-0:Chen_b:0',
       ...
       'TTGGTTTTCTGGTCAA-1-18:Sanes:18', 'TTGTGGATCGCTATTT-1-18:Sanes:18',
       'TTGTGTTTCGCAGATT-1-18:Sanes:18', 'TTGTTTGTCAGAGCGA-1-18:Sanes:18',
       'TTTACCATCTCACTCG-1-18:Sanes:18', 'TTTACTGCAAGGTCGA-1-18:Sanes:18',
       'TTTAGTCTCGGAGTGA-1-18:Sanes:18', 'TTTCACACAAGAAATC-1-18:Sanes:18',
       'TTTCCTCCAGGGTCTC-1-18:Sanes:18', 'TTTGTTGAGGAACATT-1-18:Sanes:18'],
      dtype='object', length=46520)
ad1
loading datasets 1 done...
Chen_a    20000
Chen_c    10000
Sanes      9020
Chen_b     7500
Name: dataset, dtype: int64


... storing 'batch' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'dataset' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'filename' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'scpred_prediction' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'cell.type' as categorical


../../data/integration_march_2021/input/input_500_cells_Chen+Sanes_part1.h5ad
../../data/integration_march_2021/input/input_500_cells_Chen+Sanes.h5ad
../../data/integration_march_2021/input/input_500_cells_Chen+Sanes_part3.h5ad


Trying to set attribute `.obs` of view, copying.


Chen_a    20000
Chen_c    10000
Sanes      9020
Chen_b     7500
Name: dataset, dtype: int64
concatenating...
done...
ad final
(46520, 4809)


Trying to set attribute `.obs` of view, copying.


before batch filter (n=100)
(46520, 4809)


... storing 'batch.merged' as categorical
... storing 'donor' as categorical


after batch filter (n=100)
(46497, 4809)
saving to output...
done...
