In [1]:
import pandas as pd
import pathlib
from papermill import execute_notebook

In [2]:
major_types = ['CA3', 'CA1']
output_dir = '/home/hanliu/project/mouse_rostral_brain/study/ClustersEnsemble//CA1CA3SubtypeEnsemble'
pathlib.Path(output_dir).mkdir(exist_ok=True)

## Cell Meta

In [3]:
cell_tidy_data = pd.read_msgpack(
    '/home/hanliu/project/mouse_rostral_brain/study/ClusteringSummary/Summary/TotalClusteringResults.msg'
)
cell_tidy_data['MajorType'].unique()

It is recommended to use pyarrow for on-the-wire transmission of pandas objects.
  exec(code_obj, self.user_global_ns, self.user_ns)


array(['MGE-Sst', 'CA3', 'CA1', 'CA3-St18', 'Unc5c', 'Gfra1', 'ODC', 'PC',
       'ANP', 'IT-L5', 'NP-L6', 'CGE-Lamp5', 'CT-L6', 'IG-CA2', 'DG-po',
       'DG', 'CGE-Vip', 'OPC', 'ASC', 'MGC', 'PAL-Inh', 'PT-L5',
       'MGE-Pvalb', 'VLMC', 'EC', 'VLMC-Pia', 'OLF', 'MSN-D2', 'L6b',
       'IT-L6', 'IT-L23', 'IT-L4', 'OLF-Exc', 'CLA', 'Foxp2', 'MSN-D1',
       'LSX-Inh', 'D1L-Fstl4', 'EP', 'D1L-PAL', 'Chd7'], dtype=object)

## Select significant subtype of CP ACB

In [4]:
use_clusters = sorted(cell_tidy_data[cell_tidy_data['MajorType'].isin(
    major_types)]['SubType'].unique())
use_clusters = [i for i in use_clusters if 'Outlier' not in i]
use_clusters

['CA1 Ak5',
 'CA1 Chrm3',
 'CA1 Kif26a',
 'CA1 Lingo2',
 'CA1 Ptprg',
 'CA3 Cadm2',
 'CA3 Efnb2']

## Step1 Select DMG

In [5]:
parameters = dict(auroc_cutoff=0.85, use_clusters=use_clusters)

input_path = 'related_dmgs.ipynb'
output_path = f'{output_dir}/related_dmgs.ipynb'

execute_notebook(str(input_path),
                 str(output_path),
                 parameters=parameters,
                 engine_name=None,
                 prepare_only=False,
                 kernel_name=None,
                 progress_bar=True,
                 log_output=False,
                 start_timeout=60,
                 report_mode=False,
                 cwd=output_dir)
pass

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))




## Step2 Select DMR

In [6]:
parameters = dict(
    use_clusters=[i.replace(' ', '_') for i in use_clusters],
    hypo_dmr_hits_path=
    '/home/hanliu/project/mouse_rostral_brain/DMR/SubType/Total/TotalHits.HypoDMR.h5ad'
)

input_path = 'related_dmrs.ipynb'
output_path = f'{output_dir}/related_dmrs.ipynb'

execute_notebook(str(input_path),
                 str(output_path),
                 parameters=parameters,
                 engine_name=None,
                 prepare_only=False,
                 kernel_name=None,
                 progress_bar=True,
                 log_output=False,
                 start_timeout=60,
                 report_mode=False,
                 cwd=output_dir)
pass

HBox(children=(IntProgress(value=0, max=19), HTML(value='')))




## Step3 Select corr

In [7]:
parameters = dict(corr_cutoff=0.35, distance_cutoff=500000)

input_path = 'related_corr.ipynb'
output_path = f'{output_dir}/related_corr.ipynb'

execute_notebook(str(input_path),
                 str(output_path),
                 parameters=parameters,
                 engine_name=None,
                 prepare_only=False,
                 kernel_name=None,
                 progress_bar=True,
                 log_output=False,
                 start_timeout=60,
                 report_mode=False,
                 cwd=output_dir)
pass

HBox(children=(IntProgress(value=0, max=15), HTML(value='')))




## Step4 intersect everything

In [8]:
parameters = dict(delta_dmr_rate_cutoff = 0.3)

input_path = 'intersect_everything.ipynb'
output_path = f'{output_dir}/intersect_everything.ipynb'

execute_notebook(str(input_path),
                 str(output_path),
                 parameters=parameters,
                 engine_name=None,
                 prepare_only=False,
                 kernel_name=None,
                 progress_bar=True,
                 log_output=False,
                 start_timeout=60,
                 report_mode=False,
                 cwd=output_dir)
pass

HBox(children=(IntProgress(value=0, max=16), HTML(value='')))




## Motif enrichment

In [6]:
motif_enrichment_dir = pathlib.Path(output_dir) / 'MotifEnrichment'
motif_enrichment_dir.mkdir(exist_ok=True)

In [14]:
_use_clusters = [i.replace(' ', '_') for i in use_clusters]
for cluster in _use_clusters:
    print(cluster)
    parameters = dict(cluster=cluster,
                      use_clusters=_use_clusters,
                      or_cutoff=1.3,
                      neg_lgp_cutoff=10)

    input_path = 'MotifEnrichment.ipynb'
    output_path = f'{motif_enrichment_dir}/MotifEnrichment.{cluster}.ipynb'

    execute_notebook(str(input_path),
                     str(output_path),
                     parameters=parameters,
                     engine_name=None,
                     prepare_only=False,
                     kernel_name=None,
                     progress_bar=True,
                     log_output=False,
                     start_timeout=60,
                     report_mode=False,
                     cwd=str(motif_enrichment_dir))
    

CA1_Ak5


HBox(children=(IntProgress(value=0, max=32), HTML(value='')))


CA1_Chrm3


HBox(children=(IntProgress(value=0, max=32), HTML(value='')))


CA1_Kif26a


HBox(children=(IntProgress(value=0, max=32), HTML(value='')))


CA1_Lingo2


HBox(children=(IntProgress(value=0, max=32), HTML(value='')))


CA1_Ptprg


HBox(children=(IntProgress(value=0, max=32), HTML(value='')))


CA3_Cadm2


HBox(children=(IntProgress(value=0, max=32), HTML(value='')))


CA3_Efnb2


HBox(children=(IntProgress(value=0, max=32), HTML(value='')))




In [7]:
parameters = dict(
    oddsratio_cutoff=1.8,
    lgp_cutoff=10,
    motif_enrichment_dir=str(motif_enrichment_dir),
    tf_class_level='SubFamily',
)

input_path = 'aggregate_motif_enrichment.ipynb'
output_path = f'{motif_enrichment_dir}/aggregate_motif_enrichment.ipynb'

execute_notebook(str(input_path),
                 str(output_path),
                 parameters=parameters,
                 engine_name=None,
                 prepare_only=False,
                 kernel_name=None,
                 progress_bar=True,
                 log_output=False,
                 start_timeout=60,
                 report_mode=False,
                 cwd=str(motif_enrichment_dir))
pass

HBox(children=(IntProgress(value=0, max=24), HTML(value='')))


