In [None]:
import sys
sys.path.append('methods/')
import popalign as PA
import importlib

In [None]:
'''
- Load data (example)
'''
LOAD = 'screen'

if LOAD == 'samples':
    mysamples = {
        'CTRL' : 'data/samples/PBMC.mtx',
        'GMCSF_1ng/ml' : 'data/samples/GMCSF.mtx',
        'IFNG_1ng/ml' : 'data/samples/IFNG.mtx',
        'IL2_10ng/ml' : 'data/samples/IL2.mtx',
        'CD40L_20ng/ml' : 'data/samples/CD40L.mtx',
    }
    mygenes = 'data/samples/genes.tsv'
    pop = PA.load_samples(samples=mysamples, 
                          genes=mygenes)
    
elif LOAD == 'screen':
    mymatrix = 'data/screen/drug_screen/pbmcmult4cd3minus.mtx'
    mybarcodes = 'data/screen/drug_screen/barcodes.tsv'
    mygenes = 'data/screen/drug_screen/features.tsv'
    mymetadata = 'data/screen/drug_screen/meta.csv'
    pop = PA.load_screen(matrix=mymatrix, 
                         barcodes=mybarcodes, 
                         metafile=mymetadata, 
                         genes=mygenes)

In [None]:
'''
-  Perform column normalization
-  Find best normalization factor
'''
PA.normalize(pop)

In [None]:
'''
- Plot genes (log cv ~ log cv) and filtering line --use multiple times to find best offset (usually between .7 and 1.5)
'''
PA.plot_gene_filter(pop, offset=1)

In [None]:
'''
- Gene filter the data with the last offset used in the previous step
'''
PA.filter(pop)

In [None]:
'''
- Remove red blood cells from the data
'''
PA.removeRBC(pop, 'human')

In [None]:
'''
- Generate multiple feature spaces and pick the best one based on reconstruction error
- Run GSEA on each feature
- Generate QC plots
'''
PA.onmf(pop, ncells=5000, nfeats=[5,7], nreps=2, niter=300)

In [None]:
'''
- Build a Gaussian Mixture model for each sample
- Type the models subpopulations
'''
PA.build_gmms(pop, ks=(5,20), nreps=3, reg_covar=False, rendering='grouped', types=None)

In [None]:
'''
- Calculate all the subpopulations entropies for each samples
'''
PA.entropy(pop)

In [None]:
'''
- Align subpopulations of each sample against a reference model's subpopulations
'''
PA.align(pop, ref='CTRL', method='conservative')

In [None]:
'''
- Rank each sample against a reference sample's model
'''
PA.rank(pop, ref='CTRL')

In [None]:
'''
 - Build a unique GMM for the samples concatenated together
'''
PA.build_unique_gmm(pop, ks=(5,20), nreps=3, reg_covar=False, groups=None)

In [None]:
'''
Generate a query plot
'''
PA.plot_query(pop)

In [None]:
'''
- Interactive 3D visualization of the data in feature space
'''
PA.plotfeatures(pop)