In [1]:
import pandas as pd
import pathlib
from papermill import execute_notebook

In [2]:
cell_class = ['Exc']
output_dir = '/home/hanliu/project/mouse_rostral_brain/study/ClustersEnsemble/ExcSubtypeEnsemble'
pathlib.Path(output_dir).mkdir(exist_ok=True)

## Cell Meta

In [3]:
cell_tidy_data = pd.read_msgpack(
    '/home/hanliu/project/mouse_rostral_brain/study/ClusteringSummary/Summary/TotalClusteringResults.msg'
)
cell_tidy_data['MajorType'].unique()

It is recommended to use pyarrow for on-the-wire transmission of pandas objects.
  exec(code_obj, self.user_global_ns, self.user_ns)


array(['MGE-Sst', 'CA3', 'CA1', 'CA3-St18', 'Unc5c', 'Gfra1', 'ODC', 'PC',
       'ANP', 'IT-L5', 'NP-L6', 'CGE-Lamp5', 'CT-L6', 'IG-CA2', 'DG-po',
       'DG', 'CGE-Vip', 'OPC', 'ASC', 'MGC', 'PAL-Inh', 'PT-L5',
       'MGE-Pvalb', 'VLMC', 'EC', 'VLMC-Pia', 'OLF', 'MSN-D2', 'L6b',
       'IT-L6', 'IT-L23', 'IT-L4', 'OLF-Exc', 'CLA', 'Foxp2', 'MSN-D1',
       'LSX-Inh', 'D1L-Fstl4', 'EP', 'D1L-PAL', 'Chd7'], dtype=object)

## Select significant subtype of CP ACB

In [4]:
use_clusters = sorted(cell_tidy_data[cell_tidy_data['CellClass'].isin(
    cell_class)]['SubType'].unique())
use_clusters = [i for i in use_clusters if 'Outlier' not in i]
use_clusters

['CA1 Ak5',
 'CA1 Chrm3',
 'CA1 Kif26a',
 'CA1 Lingo2',
 'CA1 Ptprg',
 'CA3 Cadm2',
 'CA3 Efnb2',
 'CA3-St18 Epha5',
 'CA3-St18 Nuak1',
 'CA3-St18 Tead1',
 'CLA Bcl11a',
 'CLA Cdh8',
 'CLA Nrp2',
 'CT-L6 Hcrtr2',
 'CT-L6 Il1rap',
 'CT-L6 Map4',
 'CT-L6 Megf9',
 'DG dg-all',
 'DG-po Bcl11a',
 'DG-po Calb2',
 'DG-po Kctd8',
 'EP Adcy8',
 'EP Rgs8',
 'EP Tspan5',
 'Gfra1 Gfra1',
 'IG-CA2 Chrm3',
 'IG-CA2 Peak1',
 'IG-CA2 Xpr1',
 'IT-L23 Cux1',
 'IT-L23 Foxp1',
 'IT-L23 Ptprt',
 'IT-L23 Tenm2',
 'IT-L4 Astn2',
 'IT-L4 Shc3',
 'IT-L5 Cdh8',
 'IT-L5 Etv1',
 'IT-L5 Grik3',
 'IT-L6 Cadps2',
 'IT-L6 Fstl4',
 'IT-L6 Man1c1',
 'IT-L6 Oxr1',
 'L6b Adcy8',
 'L6b Kcnk2',
 'L6b Nrp2',
 'L6b Pkhd1',
 'NP-L6 Boc',
 'NP-L6 Cntnap4',
 'NP-L6 Cntnap5a',
 'NP-L6 Cyp7b1',
 'NP-L6 Kcnab1',
 'NP-L6 Olfml2b',
 'OLF-Exc Bmpr1b',
 'OLF-Exc Cdh9',
 'OLF-Exc Cux2',
 'OLF-Exc Lrrtm3',
 'OLF-Exc Pld5',
 'OLF-Exc Rmst',
 'OLF-Exc Sgcd',
 'OLF-Exc Unc13c',
 'PT-L5 Abca12',
 'PT-L5 Astn2',
 'PT-L5 Kcnh1',
 'PT-L5 Necti

## Step1 Select DMG

In [5]:
parameters = dict(auroc_cutoff=0.85, use_clusters=use_clusters)

input_path = 'related_dmgs.ipynb'
output_path = f'{output_dir}/related_dmgs.ipynb'

execute_notebook(str(input_path),
                 str(output_path),
                 parameters=parameters,
                 engine_name=None,
                 prepare_only=False,
                 kernel_name=None,
                 progress_bar=True,
                 log_output=False,
                 start_timeout=60,
                 report_mode=False,
                 cwd=output_dir)
pass

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))




## Step2 Select DMR

In [6]:
parameters = dict(
    use_clusters=[i.replace(' ', '_') for i in use_clusters],
    hypo_dmr_hits_path=
    '/home/hanliu/project/mouse_rostral_brain/DMR/SubType/Total/TotalHits.HypoDMR.h5ad'
)

input_path = 'related_dmrs.ipynb'
output_path = f'{output_dir}/related_dmrs.ipynb'

execute_notebook(str(input_path),
                 str(output_path),
                 parameters=parameters,
                 engine_name=None,
                 prepare_only=False,
                 kernel_name=None,
                 progress_bar=True,
                 log_output=False,
                 start_timeout=60,
                 report_mode=False,
                 cwd=output_dir)
pass

HBox(children=(IntProgress(value=0, max=19), HTML(value='')))




## Step3 Select corr

In [7]:
parameters = dict(corr_cutoff=0.5, distance_cutoff=500000)

input_path = 'related_corr.ipynb'
output_path = f'{output_dir}/related_corr.ipynb'

execute_notebook(str(input_path),
                 str(output_path),
                 parameters=parameters,
                 engine_name=None,
                 prepare_only=False,
                 kernel_name=None,
                 progress_bar=True,
                 log_output=False,
                 start_timeout=60,
                 report_mode=False,
                 cwd=output_dir)
pass

HBox(children=(IntProgress(value=0, max=15), HTML(value='')))




## Step4 intersect everything

In [8]:
parameters = dict(delta_dmr_rate_cutoff = 0.3)

input_path = 'intersect_everything.ipynb'
output_path = f'{output_dir}/intersect_everything.ipynb'

execute_notebook(str(input_path),
                 str(output_path),
                 parameters=parameters,
                 engine_name=None,
                 prepare_only=False,
                 kernel_name=None,
                 progress_bar=True,
                 log_output=False,
                 start_timeout=60,
                 report_mode=False,
                 cwd=output_dir)
pass

## Motif enrichment

In [9]:
motif_enrichment_dir = pathlib.Path(output_dir) / 'MotifEnrichment'
motif_enrichment_dir.mkdir(exist_ok=True)

In [None]:
_use_clusters = [i.replace(' ', '_') for i in use_clusters]
for cluster in _use_clusters:
    print(cluster)
    parameters = dict(cluster=cluster,
                      use_clusters=_use_clusters,
                      or_cutoff=1.6,
                      neg_lgp_cutoff=10,
                      mask_quantile_to_max = 0.8)

    input_path = 'MotifEnrichment.ipynb'
    output_path = f'{motif_enrichment_dir}/MotifEnrichment.{cluster}.ipynb'

    execute_notebook(str(input_path),
                     str(output_path),
                     parameters=parameters,
                     engine_name=None,
                     prepare_only=False,
                     kernel_name=None,
                     progress_bar=True,
                     log_output=False,
                     start_timeout=60,
                     report_mode=False,
                     cwd=str(motif_enrichment_dir))
    

CA1_Ak5


HBox(children=(IntProgress(value=0, max=32), HTML(value='')))


CA1_Chrm3


HBox(children=(IntProgress(value=0, max=32), HTML(value='')))


CA1_Kif26a


HBox(children=(IntProgress(value=0, max=32), HTML(value='')))


CA1_Lingo2


HBox(children=(IntProgress(value=0, max=32), HTML(value='')))


CA1_Ptprg


HBox(children=(IntProgress(value=0, max=32), HTML(value='')))


CA3_Cadm2


HBox(children=(IntProgress(value=0, max=32), HTML(value='')))


CA3_Efnb2


HBox(children=(IntProgress(value=0, max=32), HTML(value='')))


CA3-St18_Epha5


HBox(children=(IntProgress(value=0, max=32), HTML(value='')))


CA3-St18_Nuak1


HBox(children=(IntProgress(value=0, max=32), HTML(value='')))


CA3-St18_Tead1


HBox(children=(IntProgress(value=0, max=32), HTML(value='')))


CLA_Bcl11a


HBox(children=(IntProgress(value=0, max=32), HTML(value='')))


CLA_Cdh8


HBox(children=(IntProgress(value=0, max=32), HTML(value='')))


CLA_Nrp2


HBox(children=(IntProgress(value=0, max=32), HTML(value='')))


CT-L6_Hcrtr2


HBox(children=(IntProgress(value=0, max=32), HTML(value='')))


CT-L6_Il1rap


HBox(children=(IntProgress(value=0, max=32), HTML(value='')))

### Aggregate motif enrichment

In [None]:
parameters = dict(
    oddsratio_cutoff=1.8,
    lgp_cutoff=10,
    motif_enrichment_dir=str(motif_enrichment_dir),
    tf_class_level='SubFamily',
)

input_path = 'aggregate_motif_enrichment.ipynb'
output_path = f'{motif_enrichment_dir}/aggregate_motif_enrichment.ipynb'

execute_notebook(str(input_path),
                 str(output_path),
                 parameters=parameters,
                 engine_name=None,
                 prepare_only=False,
                 kernel_name=None,
                 progress_bar=True,
                 log_output=False,
                 start_timeout=60,
                 report_mode=False,
                 cwd=str(motif_enrichment_dir))
pass