# BLADE DEMO

Synopsis
- Load necessary modules
- Example of how to generate simulation data (some variability options; multiple samples)
- Application of BLADE
- Performance evaluation (both cellular fraction/purification)
- Comparison with NNLS and SVR


In [13]:
import sys, os
sys.path.append(os.path.relpath('../python/')) # location of python script

from BLADE import BLADE_framework as BLADE #to be changed
import numpy as np
import itertools
import pickle
from scipy.optimize import nnls

## Generation of simulation data 

- Also provide some overview of data (e.g., what is the variability? - both in log-scale and linear scale?)
- Allow users to change setting (e.g., number of cell types and samples)

In [24]:
# training data
Ncells = [10]
Ngenes = [500]
Nsamples = [20]
Noises = [0.1, 0.2, 0.5, 0.75, 1, 1.25, 1.5] #try with less no. of noises


simfile = '../simulationdata.pickle'
if not os.path.exists(simfile):
    Synthetic_data = dict()
else:
    Synthetic_data = pickle.load(open(simfile, 'rb'))
    
for Ncell, Ngene, Nsample, Noise in itertools.product(
    Ncells, Ngenes, Nsamples, Noises,
            ):

    name = str(Ncell) +'_'+ str(Ngene) +'_'+ str(Nsample) +'_'+ str(Noise)
    
    if not name in list(Synthetic_data.keys()):
        Coef = np.random.dirichlet(np.ones(Ncell)*5, Nsample).transpose()
        Mu_train = np.random.normal(0, 2, size=(Ngene,Ncell))
        Omega_train = np.ones((Ngene,Ncell)) * Noise

        X_train = np.random.normal(Mu_train, Omega_train, size=(Nsample, Ngene, Ncell))
        Y_train = np.zeros((Ngene, Nsample))
        for i in range(Nsample):
            Y_train[:,i] = np.log(np.dot(np.exp(X_train[i,:,:]), Coef[:,i])+1)

    
        Synthetic_data[name] = {
            'Coef': Coef,
            'Mu' : Mu_train,
            'X' : X_train,
            'Y' : Y_train,
            'Omega' : Omega_train
        }

        
with open(simfile, 'wb') as fp:
    pickle.dump(Synthetic_data, fp, protocol=pickle.HIGHEST_PROTOCOL)

### BLADE application
- configuration of parameters
- running BLADE

In [26]:
pars = {
    'Alpha': [1, 10],
    'Alpha0': [0.1, 0.5, 1, 5, 10],
    'Kappa0': [1, 0.5, 0.1],
    'SigmaY': [0.1, 0.3, 0.5]
}
Nrep=3
Nrepfinal=10
Njob=10

In [28]:
for Ncell, Ngene, Nsample, Noise in itertools.product(
    Ncells, Ngenes, Nsamples, Noises,
                ):

    name = str(Ncell) +'_'+ str(Ngene) +'_'+ str(Nsample) +'_'+ str(Noise)
    outfile = '../data/BLADE_outcome_' + name + '.pickle'

    print('creating ' + outfile)
    Y = Synthetic_data[name]['Y']
    mean = Synthetic_data[name]['Mu']
    sd = Synthetic_data[name]['Omega']
    
    Ind_sample = [True]*5 + [False]*(Nsample - 5)
    Marker_Index = [True] * Ngene
    
    final_obs, best_obs, best_set, outs = BLADE(
            mean, sd, np.exp(Y)-1, Marker_Index, Ind_sample,
            pars['Alpha'], pars['Alpha0'], pars['Kappa0'], pars['SigmaY'],
            Nrep=Nrep, Njob=Njob, Nrepfinal=Nrepfinal, fsel=0)
        
    pickle.dump(
            {
                'final_obs': final_obs,
                'best_obs': best_obs,
                'best_set': best_set,
                'outs' : outs,
                'pars' : pars
            },
            open(outfile, 'wb')
    )

creating ../data/BLADE_outcome_10_1000_20_0.1.pickle
all of 1000 genes are used for optimization.
Number of samples used: 5 out of 20 samples.
Initialization with Support vector regression


[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done   4 out of  20 | elapsed:  2.0min remaining:  8.2min
[Parallel(n_jobs=10)]: Done   7 out of  20 | elapsed:  2.2min remaining:  4.2min
[Parallel(n_jobs=10)]: Done  10 out of  20 | elapsed:  2.3min remaining:  2.3min
[Parallel(n_jobs=10)]: Done  13 out of  20 | elapsed:  2.3min remaining:  1.2min
[Parallel(n_jobs=10)]: Done  16 out of  20 | elapsed:  2.3min remaining:   34.6s


KeyboardInterrupt: 

## Application of NNLS 

In [None]:
for key in Synthetic_data:
    Ncell = int(key.split('_')[0])
    Ngene = int(key.split('_')[1])
    Nsample = int(key.split('_')[2])
    Noise = float(key.split('_')[3])
    
    outfile = '../data/NNLS_outcome_' +key + '.pickle'

    if not os.path.exists(outfile):
        
        print('creating ' + outfile)
        Y = Synthetic_data[key]['Y']
        mean = Synthetic_data[key]['Mu']
        
        NNLS_mat = np.zeros(Synthetic_data[key]['Coef'].shape)
        for i in range(Nsample):
            NNLS_mat[:,i] = nnls(np.exp(mean)-1, np.exp(Y[:,i])-1)[0]
        
        pickle.dump(
            {
                'NNLS': NNLS_mat
            },
            open(outfile, 'wb')
        )