# Import libraries

In [15]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [16]:
import numpy as np
import numpy.matlib
import pandas as pd
import scanpy as sc
from scanpy.external.pp import magic
import scipy
from sklearn.metrics import adjusted_rand_score
from scipy.stats.mstats import zscore
from scipy.stats import pearsonr
import csv
import os
import matplotlib
matplotlib.use('TkAgg')
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from copy import deepcopy
import timeit
from joblib import Parallel, delayed
from collections import Counter
np.random.seed(0)

In [17]:
def files_MAST(adata, cell_type = '', outdir = ''):
    
    # if directory doesn't exist, create it
    if not os.path.exists('DEA/' + cell_type):
        os.makedirs('DEA/' + cell_type)
    
    adata_celltype = deepcopy(adata_final)
    adata_celltype = adata_celltype[adata_celltype.obs['final_subclusters'].isin([cell_type]),:]
    print(cell_type + '\t'+"Dimensions" + "\t"+ "{}".format(adata_celltype.shape))
    
    # make a dataframe for the expression matrix
    expmat = pd.DataFrame(adata_celltype.X, columns = adata_celltype.var.index, index = adata_celltype.obs.index)

    # make a dataframe for the observation data
    cdata = pd.DataFrame(adata_celltype.obs, columns=adata_celltype.obs.columns, index = adata_celltype.obs.index)

    
    expmat.to_csv(r'DEA/'+ cell_type + '/adata_'+ cell_type+ '_exp_mat.csv', index=True)
    cdata.to_csv(r'DEA/'+ cell_type + '/adata_'+ cell_type+ '_cdata.csv', index=True)
    
    print("Files written for {}".format(cell_type))

In [18]:
def files_MAST(adata, cell_type = '', outdir = ''):
    
    # if directory doesn't exist, create it
    if not os.path.exists('DEA/' + cell_type):
        os.makedirs('DEA/' + cell_type)
    
    adata_celltype = deepcopy(adata_final)
    adata_celltype = adata_celltype[adata_celltype.obs['final_subclusters'].isin([cell_type]),:]
    print(cell_type + '\t'+"Dimensions" + "\t"+ "{}".format(adata_celltype.shape))
    
    # make a dataframe for the expression matrix
    expmat = pd.DataFrame(adata_celltype.X, columns = adata_celltype.var.index, index = adata_celltype.obs.index)

    # make a dataframe for the observation data
    cdata = pd.DataFrame(adata_celltype.obs, columns=adata_celltype.obs.columns, index = adata_celltype.obs.index)

    
    expmat.to_csv(r'DEA/'+ cell_type + '/adata_'+ cell_type+ '_exp_mat.csv', index=True)
    cdata.to_csv(r'DEA/'+ cell_type + '/adata_'+ cell_type+ '_cdata.csv', index=True)
    
    print("Files written for {}".format(cell_type))

In [19]:
def run_MAST_for_celltype(adata, cell_type):
    start_time = timeit.default_timer()
    MAST = './runMAST.R'
    
    # run 
    files_MAST(adata = adata_final, cell_type = cell_type)
    
    expmat_Inputfile = ('DEA/'+ cell_type + '/adata_'+ cell_type+ '_exp_mat.csv')
    cdata_Inputfile = ('DEA/'+ cell_type + '/adata_'+ cell_type+ '_cdata.csv')
    output_file_cort = 'DEA/' +  cell_type + "/MAST_DEA_" + cell_type + "_cort.csv" 
    output_file_bs = 'DEA/' +  cell_type + "/MAST_DEA_" + cell_type + "_bs.csv" 
    
    print("Running " + MAST + " " + expmat_Inputfile + " " + cdata_Inputfile + " " + output_file_cort + " " + output_file_bs)
    
    os.system(MAST + " " + expmat_Inputfile + " " + cdata_Inputfile + " " + output_file_cort + " " + output_file_bs)
    out_str = "Finished running MAST on"+ "( " + cell_type +  ")" +" in {} seconds.".format(timeit.default_timer()-start_time)
    
    return out_str

# Populate input parameters

In [20]:
# populate from command line
samples = ['1819_C1029_Ctrl_ES_Cort_IGO_10837_23', '1820_C1029_Ctrl_ES_BS_IGO_10856_3', '1821_996_VE_ES_Cort_IGO_10875_6', '1822_996_VE_ES_BS_IGO_10875_7']
indir = './input/'
outdir = './output_Nov/'
figures = './figures/'

## Prepare data for MAST

In [21]:
adata_final = sc.read_h5ad(outdir+'adata_final_high_var_final_clusters.h5ad')

### Oligodendrocytes

In [9]:
run_MAST_for_celltype(adata_final, "Oligodendrocytes")

Oligodendrocytes	Dimensions	(4890, 18947)
Files written for Oligodendrocytes
Running ./runMAST.R DEA/Oligodendrocytes/adata_Oligodendrocytes_exp_mat.csv DEA/Oligodendrocytes/adata_Oligodendrocytes_cdata.csv DEA/Oligodendrocytes/MAST_DEA_Oligodendrocytes_cort.csv DEA/Oligodendrocytes/MAST_DEA_Oligodendrocytes_bs.csv


'Finished running MAST on( Oligodendrocytes) in 1212.92347419 seconds.'

### Microglia

In [8]:
run_MAST_for_celltype(adata_final, "microglia")

  res = method(*args, **kwargs)


microglia	Dimensions	(697, 18947)
Files written for microglia
Running ./runMAST.R DEA/microglia/adata_microglia_exp_mat.csv DEA/microglia/adata_microglia_cdata.csv DEA/microglia/MAST_DEA_microglia_cort.csv DEA/microglia/MAST_DEA_microglia_bs.csv


'Finished running MAST on( microglia) in 389.58715040799996 seconds.'

### Astrocytes

In [10]:
run_MAST_for_celltype(adata_final, "Astrocytes")

Astrocytes	Dimensions	(1716, 18947)
Files written for Astrocytes
Running ./runMAST.R DEA/Astrocytes/adata_Astrocytes_exp_mat.csv DEA/Astrocytes/adata_Astrocytes_cdata.csv DEA/Astrocytes/MAST_DEA_Astrocytes_cort.csv DEA/Astrocytes/MAST_DEA_Astrocytes_bs.csv


'Finished running MAST on( Astrocytes) in 570.1602463660001 seconds.'

### OPC

In [14]:
run_MAST_for_celltype(adata_final, "OPC")

OPC	Dimensions	(496, 18947)
Files written for OPC
Running ./runMAST.R DEA/OPC/adata_OPC_exp_mat.csv DEA/OPC/adata_OPC_cdata.csv DEA/OPC/MAST_DEA_OPC_cort.csv DEA/OPC/MAST_DEA_OPC_bs.csv


'Finished running MAST on( OPC) in 337.31309115399927 seconds.'

### Excitatory_neurons

In [11]:
run_MAST_for_celltype(adata_final, "Excitatory_neurons")

Excitatory_neurons	Dimensions	(7232, 18947)
Files written for Excitatory_neurons
Running ./runMAST.R DEA/Excitatory_neurons/adata_Excitatory_neurons_exp_mat.csv DEA/Excitatory_neurons/adata_Excitatory_neurons_cdata.csv DEA/Excitatory_neurons/MAST_DEA_Excitatory_neurons_cort.csv DEA/Excitatory_neurons/MAST_DEA_Excitatory_neurons_bs.csv


'Finished running MAST on( Excitatory_neurons) in 1769.1670335070012 seconds.'

### Interneurons

In [12]:
run_MAST_for_celltype(adata_final, "Inhibitory_neurons")

Inhibitory_neurons	Dimensions	(6821, 18947)
Files written for Inhibitory_neurons
Running ./runMAST.R DEA/Inhibitory_neurons/adata_Inhibitory_neurons_exp_mat.csv DEA/Inhibitory_neurons/adata_Inhibitory_neurons_cdata.csv DEA/Inhibitory_neurons/MAST_DEA_Inhibitory_neurons_cort.csv DEA/Inhibitory_neurons/MAST_DEA_Inhibitory_neurons_bs.csv


'Finished running MAST on( Inhibitory_neurons) in 1725.2382228970018 seconds.'

### Vascular_Fibro_cells

In [13]:
run_MAST_for_celltype(adata_final, "Vascular_Fibro_cells") 

Vascular_Fibro_cells	Dimensions	(1165, 18947)
Files written for Vascular_Fibro_cells
Running ./runMAST.R DEA/Vascular_Fibro_cells/adata_Vascular_Fibro_cells_exp_mat.csv DEA/Vascular_Fibro_cells/adata_Vascular_Fibro_cells_cdata.csv DEA/Vascular_Fibro_cells/MAST_DEA_Vascular_Fibro_cells_cort.csv DEA/Vascular_Fibro_cells/MAST_DEA_Vascular_Fibro_cells_bs.csv


'Finished running MAST on( Vascular_Fibro_cells) in 477.68334943100126 seconds.'